1. Installing and Loading Packages

This first block of code installs and loads all the libraries needed for the analysis. The include=FALSE option in the block header ensures that the installation commands and loading messages will not appear in the final HTML report, making it cleaner.

2. Data Loading and Preparation

Here, we load the raw count table and gene annotations directly from the GEO repository.

# load counts table from GEO
urld <- "https://www.ncbi.nlm.nih.gov/geo/download/?format=file&type=rnaseq_counts"
path <- paste(urld, "acc=GSE205748", "file=GSE205748_raw_counts_GRCh38.p13_NCBI.tsv.gz", sep="&");
gset<- as.matrix(data.table::fread(path, header=T, colClasses="integer"), rownames="GeneID")

# load gene annotations
apath <- paste(urld, "type=rnaseq_counts", "file=Human.GRCh38.p13.annot.tsv.gz", sep="&")
annot <- data.table::fread(apath, header=T, quote="", stringsAsFactors=F, data.table=F)

# Load the dataset
gse <- getGEO("GSE205748", GSEMatrix = TRUE, AnnotGPL = TRUE)
## Found 1 file(s)
## GSE205748_series_matrix.txt.gz
## Annotation GPL not available, so will use submitter GPL instead
gse <- gse[[1]]

# Extract phenotype data
pheno_data <- pData(gse)
pheno_data <- pheno_data[, c("title", "geo_accession", "tissue type:ch1")]

3. Selection of 433 genes

Gene selection performed by text mining and functional enrichment.

#Define genes of interest
genes_of_interest <- c(
  "ABCB11", "ABCD2", "ACE", "ACKR2", "ACP5", "ACP7", "ADAMTS9", "ADCY7", "ADGRL2", "ADIPOQ", "AHR", "AIM2", "AKAP13", "ALB", "ANKRD55", "ANXA1",
  "APOL1", "APOL6", "AQP1", "AREG", "ARHGEF3", "ATG16L1", "ATG5", "ATOX1", "ATXN2L", "BEND2", "BGLAP", "BMP2", "BMP7", "BNIP3L", "BRAF", "BTG1",
  "C15ORF48", "C7ORF57", "CALD1", "CAPS2", "CARD9", "CASP1", "CASP10", "CAST", "CCL2", "CCL20", "CCL25", "CCL3", "CCL5", "CCND1", "CCND3", "CCR2",
  "CCR6", "CCR7", "CCRL2", "CD14", "CD160", "CD19", "CD274", "CD28", "CD36", "CD38", "CD3E", "CD4", "CD40", "CD40LG", "CD52", "CD63", "CD68",
  "CD69", "CD80", "CD83", "CD86", "CD8A", "CD8B", "CDC42BPB", "CEBPA", "CEBPG", "CENPK", "CLEC2B", "CLEC4D", "CLIC3", "CMAHP", "CMTM2",
  "COL1A1", "COX2", "CRB1", "CREM", "CRP", "CSF2", "CSMD1", "CSN3", "CSNK1A1", "CTLA4", "CTSB", "CTSK", "CUX1", "CX3CR1", "CXCL10", "CXCL13",
  "CXCL16", "CXCL2", "CXCL8", "CXCR2", "CYCS", "CYP1A1", "CYP4F22", "DDIT3", "DDX60", "DKK1", "DLAT", "DNAJA2", "DNAJB6", "DUSP4", "DYSF",
  "EDNRA", "EFCAB13", "EFCAB7", "EGF", "EGFR", "EGR3", "EIF5B", "ENO1", "EOMES", "ERAP1", "ERAP2", "ERN1", "ERP44", "EZH2", "FAXDC2", "FCGR1A",
  "FCGR2A", "FCGR3A", "FGB", "FNBP1", "FOS", "FOSL1", "FOSL2", "FOXP3", "FRZB", "FUT2", "GAPDH", "GATA3", "GBP1", "GBP3", "GBP5", "GEM", "GINS1",
  "GJB2", "GJB6", "GLUL", "GOLIM4", "GPR35", "GPT", "GZMA", "GZMB", "GZMK", "HCAR3", "HERC6", "HHAT", "HIF1A", "HK2", "HLA-A", "HLA-B", "HLA-C",
  "HLA-DQB1", "HLA-DRB1", "HSPA5", "HSPA6", "HYAL4", "ICAM1", "ICOS", "IFI16", "IFI6", "IFIH1", "IFIT3", "IFNA1", "IFNG", "IFNGR1", "IFNLR1",
  "IGF2", "IGF2-AS", "IL10", "IL12B", "IL13", "IL15", "IL17A", "IL17F", "IL17RA", "IL18", "IL1A", "IL1B", "IL1F10", "IL1R1", "IL1RN", "IL2",
  "IL21", "IL21-AS1", "IL22", "IL23A", "IL23R", "IL2RA", "IL33", "IL36RN", "IL37", "IL4", "IL5", "IL6", "IL6R", "IL7", "INS", "INS-IGF2", "IRAK1",
  "IRS1", "ITGA2B", "ITGAL", "ITGAM", "ITGAX", "JAK2", "JAK3", "JDP2", "JRKL", "JUN", "JUNB", "JUND", "KANK4", "KDM5B", "KIR2DS1", "KIR3DL1",
  "KIR3DL2", "KLRB1", "LAMP1", "LAMP2", "LEP", "LILRA5", "LILRB2", "LINC01185", "LINC01250", "LMO7", "LPAL2", "LRRK2", "LTA", "LURAP1L",
  "LURAP1L-AS1", "LYZ", "MAF", "MBL2", "MCAM", "MCL1", "MEFV", "MICA", "MIR146A", "MIR21", "MIX23", "MMP1", "MMP3", "MMP7", "MMP9", "MRPS23",
  "MSN", "MTHFR", "MUCL1", "MX1", "MYC", "MYNN", "NABP1", "NAMPT", "NCOA7", "NDUFS1", "NFKB1", "NKG7", "NLRP3", "NMI", "NOD2", "NOS2",
  "NOXRED1", "NPEPPS", "NRG1", "NT5C3A", "OLR1", "OSMR", "PDCD1", "PDLIM7", "PER1", "PFDN4", "PFDN5", "PFKL", "PGD", "PGK1", "PHEX", "PI3",
  "PIK3CD", "PINK1", "PLA2G4D", "PLCG1", "PLG", "PLIN5", "PLS1", "PPARD", "PPARG", "PPARGC1A", "PPARGC1B", "PRDM1", "PRF1", "PRTN3", "PSG2",
  "PSMC2", "PSMD7", "PSME2", "PTGS1", "PTGS2", "PTH", "PTPN22", "PTX3", "PYGL", "RAC1", "RBM45", "REL", "RETN", "RGPD6", "RIT1", "RORC", "RPL15",
  "RPL36AL", "RPL41", "RPL7", "RPS19", "RPS21", "RPS26", "RPS6KB1", "RPS7", "RSAD2", "RUNX2", "RUNX3", "S100A12", "S100A8", "S100A9", "S100P",
  "SAA1", "SAMD9", "SAR1A", "SCN1A", "SEC14L2", "SEC24B", "SELL", "SERPINA1", "SERPINB1", "SERPINE1", "SF3B1", "SF3B3", "SGK1", "SH3BGRL3",
  "SIAH1", "SLC1A2", "SLC2A3", "SLC51B", "SLC7A11", "SLC7A5", "SMAD3", "SMARCA4", "SMOX", "SOCS1", "SOD2", "SOST", "SOX4", "SP7", "SPCS3",
  "SPON2", "SPP1", "SSR1", "STAT1", "STAT3", "STIM1", "SUOX", "SYT1", "TALDO1", "TBX21", "TEK", "TFPI", "TGFA", "TGFB1", "TGFBR3", "TIMP1",
  "TLR2", "TLR3", "TLR4", "TLR9", "TMBIM6", "TMEM45A", "TMPRSS11B", "TNF", "TNFAIP3", "TNFAIP6", "TNFAIP8", "TNFRSF10A", "TNFRSF1A",
  "TNFRSF1B", "TNFRSF9", "TNFSF10", "TNFSF11", "TNFSF13B", "TNFSF15", "TNIP1", "TOMM5", "TOMM7", "TPST2", "TPT1", "TRAF2", "TRAF3IP2", "TRAF4",
  "TRAF5", "TRBV20OR9-2", "TRIM22", "TRIM69", "TTC39B", "TYK2", "TYMP", "UBE2L3", "UQCR10", "UTP11", "VCAM1", "VDR", "VEGFA", "VIM", "WDR1",
  "WNK1", "WWOX", "XAF1", "XBP1", "YOD1", "YWHAB", "ZC3H12A", "ZFP36", "ZMIZ1", "ZNF316", "ZNF415", "ZNF483"
)

# Load the data.table package
library(data.table)

# Keep only the notes for the genes of interest.
 annot_filtered <- annot[annot$Symbol %in% genes_of_interest, ]
 
 # Filter the count matrix using GeneIDs as the key (MUCH MORE SECURE)
expr_data_filtered <- gset[rownames(gset) %in% annot_filtered$GeneID, ]
 
# Filter expression data for genes of interest
express <- rownames(expr_data_filtered)

# Mapping Entrez IDs to gene symbols
gene_symbols <- mapIds(org.Hs.eg.db, keys = rownames(expr_data_filtered), column = "SYMBOL", keytype = "ENTREZID", multiVals = "first")
## 'select()' returned 1:1 mapping between keys and columns
# Replace rownames with gene symbols
rownames(expr_data_filtered) <- gene_symbols

# View the updated dataframe
print("Data frame com rownames modificados para símbolos de genes:")
## [1] "Data frame com rownames modificados para símbolos de genes:"
print(expr_data_filtered)
##             GSM6222612 GSM6222613 GSM6222614 GSM6222615 GSM6222616 GSM6222617
## TNFRSF9              2         27         10          0          7          7
## ENO1             10652      14830      16786      13529      13523      10798
## PIK3CD             284        505        494        224        350        474
## PGD               1362       1863       2380       2955       3061       2001
## MTHFR              724       1582       1712        580       1122        935
## TNFRSF1B           317        700        634        303        401        663
## PINK1             1716       1914       2019       2979       2458       2495
## IFNLR1             449        385        473        259        586        345
## RUNX3             1015        898       1250        567        946        794
## SH3BGRL3          2089       3566       3529       4353       3158       4566
## CD52                87        272        211        198        123        266
## IFI6               347        367       1070        456        299        939
## ZC3H12A            536        569        679        581       1941        511
## UTP11              542        852        865        825        677        805
## JUN               1526       1918       1944       1239       2230       2325
## KANK4               27         58         85        125        165        100
## EFCAB7             144        268        298        164        200        262
## IL23R                1          1          5          0          3          4
## ADGRL2             550        641        680       1004       1324        896
## GBP3               325        507        360        369        530        447
## GBP1               260        568        649        323        618        884
## GBP5                27         59         76         21         16        107
## TGFBR3            1078       1971       1804       1344       2673       2115
## VCAM1               49        400        279        105        132        223
## PTPN22              32         46         45         35         37         80
## CD160               11         33         51         18         24         18
## FCGR1A               2          9         21          7          6          8
## MCL1              3897       4888       5258       4524       7500       6658
## CTSK              2143      11482      10318       3137       4412       9330
## RORC               961        944       1418       1313       1406       1344
## S100A9              77        430        331        248       5157        323
## S100A12              1          5          5          0         10         10
## S100A8              65        434        388        386       3893        185
## IL6R               434        427        330        288        507        527
## RIT1               435        634        632        594        679        665
## BGLAP               28         64         49         26         21         37
## IFI16             1985       2513       2692       1648       2845       3002
## AIM2                 3         16         10          8         13         10
## CRP                  0          1          0          0          0          0
## FCGR2A             107        348        209        101        151        291
## HSPA6              271        426        331        168        256        392
## FCGR3A              33        102         59         98         48        102
## SELL                20         56         71         15         24        101
## GLUL              6926       8315       9542       6633      10285      10338
## PTGS2               37         49         50         89        106         56
## CRB1                 9          2          4          7          3          6
## KDM5B             1691       1950       2290       1748       3407       2073
## IL10                 2          6          2          2          3          3
## YOD1              1011       2294       2456       2773       2129       2525
## HHAT               130        127        232        182        331        240
## TRAF5              270        509        361        249        246        487
## NLRP3               25         38         37         18         50         55
## LINC01250            0          0          4          0          0          1
## RPS7             12403      14409      16815      23213      12717      24182
## RSAD2               35         59        118         54         67        129
## FOSL2             5780       6432       8017       4807      10172       6622
## REL                978        761        780        704       1782        881
## TGFA               512        609        662       1286       1277        940
## DYSF               151        419        491        240        256        355
## HK2                418        723        942       1040       1490        677
## CD8A                38        111         62         48         86        169
## CD8B                26         61         34         32         75         82
## EIF5B             3619       5862       5931       3400       5065       4598
## IL1R1             1832       1908       2038       1587       3054       2597
## RGPD6              626        530        484        518        658        744
## IL1A                17        135         46         56        128         32
## IL1B                 7         29         12         49         96          9
## IL37              1340       2783       2964       2765       1874       2715
## IL36RN            1537       2646       2899       3273       2480       3173
## IL1F10             128        158        237        144        127        195
## IL1RN             2209       3189       3008       2643       2377       2946
## NMI                269        490        587        421        519        669
## TNFAIP6             25        150        150         30         76        135
## IFIH1              287        522        500        364        553        337
## SCN1A                1          1          2          5          5          4
## ABCB11              11         44         44         82        122         33
## RBM45              144        207        281        198        231        263
## FRZB               416        646       1027        781        546        890
## TFPI               278        732        624        306        381        632
## STAT1             1429       1638       1976       1343       2470       2558
## NABP1              165        349        378        448        340        343
## SF3B1             6174       7041       7886       6575       8777       8635
## CASP10             310        455        630        280        414        517
## CD28                 4         27         12          6         24         47
## CTLA4                4         29         11          6          4          8
## ICOS                 1         21          2          2          2         12
## NDUFS1            1476       1844       2161       2115       3360       1951
## CXCR2               84        172        138         96        176        137
## IRS1               256        735        601        580        800        800
## CCL20                0         38         34         19         18          3
## ATG16L1            859        856       1089        768       1256        913
## GPR35               48         68        126         18         65         70
## PDCD1                5         14         11          1          4         23
## PPARG               51        208        197        826        534        312
## RPL15            21497      29981      31081      38260      25740      45023
## EOMES                3         16          8          5          8         14
## CX3CR1              71        119        134         59         87        120
## ACKR2               21         68         50         34         61         48
## CCR2                33         71         88         60         45         95
## CCRL2                7         23         20         12          3         18
## TLR9                17         28         32         14         13         18
## ARHGEF3            483        641        740        536        925       1039
## ADAMTS9            102        311        218        191        206        257
## TMEM45A          10685      21584      18471      20104      18720      15315
## CD80                 2          3          1          0          1          1
## CD86                61        111        130         69        101        153
## MIX23              169        337        363        315        215        345
## PLS1                88         99        135         95        199        150
## PTX3                15         43         36         11         12         49
## GOLIM4             486        892        944        587       1016       1077
## MYNN               444        544        575        557        734        658
## TNFSF10            965       1436       1636       1155       1505       2040
## ADIPOQ              17        417        157        546         47       1233
## SPON2              607       1285       1356       1162        926       2071
## S100P              252         89        431        370        122        151
## WDR1              4297       7813       7154       5565       6405       6187
## CD38                 2         10         10          6          0         11
## PPARGC1A           376        188        337        499        491        312
## TMPRSS11B            0          0          0          0          0          0
## CSN3                 0          0          1          0          0          0
## ALB                 14          9         16          6          8         16
## CXCL8                3          5          6          2          8          5
## CXCL2               12         14         13          8          2          5
## AREG               101         81         72        152        155         71
## CXCL10               5         36         57          5          0         77
## CXCL13               2          2          0          0          9          1
## SPP1                 1          2          6          7          7          7
## HERC6              137        177        310        124        285        282
## NFKB1             1228       1676       1620       1207       2149       1699
## SEC24B             914       1169       1313       1082       1515       1356
## EGF                 49         80         77         87         63        139
## IL2                  0          2          0          1          0          2
## IL21                 0          0          0          0          0          0
## IL21-AS1             0          0          0          0          1          0
## SLC7A11             60        154        125         47        163        115
## IL15                33        135        118         85         45        149
## EDNRA              173        413        402        337        263        569
## TLR2               102        139        169        200        458        203
## FGB                  0          0          0          0          0          1
## DDX60              661        617        769        454       1059        814
## SPCS3             1440       1849       1933       2314       2433       2693
## TLR3                98        104        107         94        152        207
## OSMR               562        737        739        583        922        697
## GZMK                14         57         35          5         24         41
## GZMA                12         48         83         18         20         96
## ANKRD55              7          5          3          6          4          3
## CENPK               56        153        161        114        114        145
## CAST              7657       8059       8664       8059      11764       9845
## ERAP1             2227       2627       2237       1506       2924       2018
## ERAP2              140        325        781        168        255        854
## TNFAIP8            592       1133       1156       1113       1135       1462
## CSF2                 1          0          0          2          0          3
## IL5                  0          4          1          0          1          3
## IL13                 0          4          1          0          0          0
## IL4                  3          5          4          1          1          3
## CD14               236       1028        848        288        388        807
## CSNK1A1           6439       9369       9762       8725      12383      10761
## PPARGC1B           275        299        318        302        770        256
## TNIP1             1611       2682       2651       2689       2791       3532
## ATOX1              300        460        537        721        430        631
## FAXDC2             781       1800       1761       3309       3067       1444
## IL12B                3          2          1          1          6          1
## MIR146A              2          3          0          2          1          1
## PDLIM7             739       1980       1505       1405        740       1311
## SERPINB1           602        826        885        652        735       1247
## SSR1              1961       3147       3473       2409       3573       3278
## CD83               105        260        208        147        196        206
## SOX4              1401       1741       1788       1136       1311       2054
## CMAHP              515        976       1471        728        711       1030
## HLA-A            16390      19941      26976       8603      21863      24629
## HLA-C            15998      17888      30681      16498      14203      22005
## HLA-B            23266      30506      41190      16353      31062      36087
## MICA               267        675        313        368        332        510
## LTA                 14         19         22          4         18          9
## TNF                 59        112         83         57         57         77
## HLA-DRB1          2595       3529       4884       3470       2545       6918
## HLA-DQB1           642       1137       1969       1580        947       2522
## PPARD             1049       1791       2129       1499       1924       1470
## CCND3              942       1028       1293        941       1150       1089
## VEGFA              967       1072       1396        890       1053        821
## RUNX2              119        159        147         85        147        174
## IL17A                0          0          0          0          2          0
## IL17F                0          0          0          0          7          0
## PRDM1              537       1034       1020       1168       1182        825
## ATG5               375        705        748        760        648        988
## TRAF3IP2           917       1361       1406       1100       1352       1547
## NCOA7              361        547        445        586        739        785
## SGK1              1565       2169       3636       2217       1912       2680
## IFNGR1            1516       2870       2904       1854       2439       2603
## TNFAIP3            511        863        867        584       1211       1178
## SOD2              2289       4500       3961       3418       4414       3782
## LPAL2                7         30          7         16         10         15
## PLG                  5         12          6          6          5          3
## CCR6                38        121         90         68        155        152
## RAC1              6049      12161      11758       7544       8667       9676
## ZNF316            1654       3285       4517       1393       2196       2255
## AHR               1398       2252       2135       2191       3470       3029
## IL6                  3          6          2          2          0          8
## TOMM7             3531       3634       4691       5514       3022       7544
## CYCS              1488       2983       3267       3585       2817       2688
## AQP1              5450      10163       9168       6306       6375      11991
## NT5C3A             593       1032       1386       1540       1104       1158
## EGFR              6153       5020       6337       5107      11485       7981
## CD36               435       1856       1553       1559       1407       2148
## SAMD9              106        128        136        179        313        240
## SERPINE1            73        177        158         86        142         92
## CUX1              1393       1771       2134       1294       2659       2268
## PSMC2             1659       2762       3176       2476       2419       2617
## NAMPT             1078       1491       1570       1572       2462       1468
## HYAL4               33         10         72         20         40          7
## LEP                  7        134         18        217          2        444
## CALD1             4068       7827       4959       4136       3886       5766
## BRAF               738        832        901        892       1103       1125
## EZH2               462        677        912        655        695        709
## DNAJB6            2665       4612       5033       3998       4649       4326
## CSMD1               72         93          8         35         23         28
## CTSB              4344      11677       9495      10596      11015       9114
## EGR3              1600       2577       2564       2735       3526       1942
## TNFRSF10A          131        111        143         95        179        154
## BNIP3L            2339       4157       4438       6071       4658       4634
## DUSP4              443        399        343       1118        751        336
## NRG1               279        245        331        172        344        241
## RPL7             23865      29483      30936      39818      26781      45705
## IL7                 96        222        271        215        269        330
## GEM                119        430        439        234        291        440
## MYC               1007       1784       2958       1603       2769       2171
## GPT                483        947       1063       2011       1572        708
## JAK2               724        634        638        492        979        757
## CD274               16         49         15         13         27         49
## IL33              1053       1846       1451       1170       1427       1781
## LURAP1L-AS1          2          3          9          4          7          3
## LURAP1L            275        489        551        367        262        558
## TTC39B             753       1117       1319       2583       2510       1237
## IFNA1                0          0          0          0          0          0
## TEK                146        276        251        176        204        314
## TRBV20OR9-2          0          0          0          0          2          0
## TOMM5              448       1011       1034       1272        702       1144
## ANXA1             5102       9731      11536       8457       7094      10526
## ERP44              784       1502       1767       1152       1388       1459
## ZNF483             224        197        249        158        110        298
## TNFSF15             21         30         18         17         43         16
## TLR4               104        282        281        163        215        478
## PTGS1             6519       6887       7433       4029       8774       6559
## HSPA5             3746       7806       8964       4431       9110       5080
## FNBP1             1310       2378       2775       1594       2022       2452
## CARD9              100        236        214        101        150        222
## TRAF2              268        587        455        354        405        464
## CLIC3             1259       1814       1747       1664       1372       2075
## IL2RA                8         33          5          5         16         34
## GATA3             5874       9882      11013       4583       7590       8040
## VIM              18269      36935      28686      23620      20566      47978
## CREM               155        352        352        316        273        373
## DKK1                16         23         28         27         25         20
## MBL2                 0          0          0          0          0          0
## SAR1A             1796       2423       2599       2078       2337       2866
## PRF1                17         74         58         18         24         72
## ZMIZ1             1807       3367       3689       1718       3932       2542
## IFIT3              241        477        533        288        308        620
## TALDO1            1983       3126       3398       4633       3613       3403
## IGF2               252        315        404        256        331        499
## INS-IGF2           229        304        378        255        293        478
## IGF2-AS              3          1          2          1          2          1
## INS                  0          0          0          0          0          0
## STIM1             2224       3008       3673       1334       3360       2629
## TRIM22             434        945        954        402        796       1123
## PTH                  0          0          0          0          0          0
## SAA1                 3         34         73        570        162        315
## SLC1A2              50         61         35         51         85         70
## FOSL1               26         19         41         44         34         47
## CCND1             6066      12188      12544       5333      10977      11583
## JRKL               238        314        239        216        345        257
## MMP7               715        118        912        210        149        326
## MMP1                 6          0          4          1          4          2
## MMP3                15          1          6          6          4          2
## CASP1              591       1249       1315       1043       1062       1536
## DLAT               562        686        821        863       1139        795
## IL18              2542       3117       3084       3509       2853       3467
## CD3E                41        167         79         38         83        179
## MCAM              1176       3184       2338       2243       1632       2121
## WNK1              5021       4979       5457       4006      10570       6039
## TNFRSF1A          2526       3104       3658       2510       3670       3717
## GAPDH            20302      25743      31808      35503      22529      30124
## CD4                330        854        850        257        421        780
## SLC2A3             165        234        219        119        195        282
## CLEC4D               1          3          0          0          0          0
## KLRB1                6         28         40         16         10         50
## CD69                 5         60         39         26         46        104
## CLEC2B            1143       1569       2368       1792       1718       2601
## OLR1                 0          3          1          3          6          0
## ABCD2               10         35         32         36         14         91
## LRRK2              357        332        352        233        750        516
## VDR               1130       1617       1893       1529       2380       1690
## TMBIM6            9051      12982      14532      16482      18086      13529
## PFDN5             3197       4740       5065       6694       3655       7516
## SP7                  2          0          3          5          0          1
## MUCL1             2549       4340       6798       9638       2343       5597
## CD63              4955      11359      10139       9141       5549      12889
## SUOX              2304       3391       3708       2546       3384       2862
## RPS26              517        926       1055       1117       2980       5144
## RPL41             7939      10874      12532      15880       8644      17038
## IL23A               13         16         35         19          8         15
## DDIT3              301        382        435        348        238        454
## IFNG                 0          1          3          0          1          0
## IL22                 0          0          0          0          0          0
## LYZ                531       1091        995       1268        654       2287
## CAPS2               67         55         71         84         75        111
## SYT1                 3         43         44         37         20         21
## BTG1              7162      10685      12504       7694       8283      10888
## HCAR3              198        245        487        364        263        479
## GJB2              1349       1380       3749       2818       2806       2128
## GJB6              1029        887       3263       2184       1349       2338
## TNFSF11              0         12          7         22         15         19
## TPT1             51220      70418      78301     115159      57482     116850
## LMO7              1303       1354       1523       1372       1895       1528
## TNFSF13B            68        171        154         97         69        241
## LAMP1             5840      10475      13266       5663       9596       8068
## PSME2              832       1411       1653       1412       1044       1684
## GZMB                 3          7         12          3          7         34
## RPL36AL           2505       4263       4613       4166       2787       6001
## PYGL              1228       1485       1353       1093       1665       1129
## HIF1A             1322       1051       1401       1271       1832       1540
## FOS                986       1366       4585       1189       3074       5074
## JDP2               711       1054       1216        719       1143       1563
## NOXRED1             37         35         38         29         53         80
## SERPINA1            21         74        267         42         54        128
## CDC42BPB          2309       3761       4232       1678       4605       2795
## PLA2G4D            294        734       1161        372       1285        588
## TRIM69              13         22         42         25         26         21
## SLC51B              14         31         13         17         14         28
## SMAD3             1185       1781       1988       1019       1595       1626
## CYP1A1              20         62         49         37         11         41
## AKAP13            2497       2896       3052       1949       4454       3359
## MEFV                 2         21         17          6          6          7
## SOCS1               50         96         89        109         75        185
## ATXN2L            1677       3708       4833       1668       2827       2343
## CD19                 2         10         11          6         10          2
## ITGAL               38        199        151         53        118        216
## ITGAM               70        324        277         86        137        299
## ITGAX               73        255        215         90         72        223
## DNAJA2            1570       2198       2392       2084       2276       2321
## SIAH1              746       1429       1500        964        904       1281
## ADCY7              808       1022       1154        714       1597       1236
## NOD2               418        736        576        422        558        527
## CMTM2                4          7          7          7          6          6
## SF3B3             2215       3254       3387       2211       4454       3266
## PSMD7             1877       2940       3149       2906       2766       2993
## WWOX               242        204        223        176        182        240
## MAF               6116       9500      10961       4472       8032       9876
## SLC7A5             671       1313       1837       1497       2438        874
## CXCL16             630       1178       1027        879        822        894
## XAF1               420        604       1293        409        536       1001
## CD68               289       1025        869        383        467       1052
## PER1              3195       7686      13116       2205       5150       7410
## NOS2                 4          2          5          1          0         18
## TRAF4              526        543        846        826        548        583
## CCL2               206        215        217        183        281        289
## CCL5                55        127        145         44         73        195
## CCL3                 0          8          6          4          1         10
## CCR7                19         35         37         20         45         20
## STAT3             3976       5694       6006       3376       9020       4567
## SOST                 6          1          1         17          1          0
## ITGA2B              15         32         26         11         18         15
## EFCAB13             72        115        152        113         80        193
## NPEPPS            2232       2758       2752       2362       3478       2584
## TBX21                1          7          8          0          1         18
## COL1A1           32545      36338      49051      11365      52517      28945
## MRPS23             469        742        834        776        615        873
## MIR21                0          0          3          1          1          0
## RPS6KB1            706        829        875        820       1139       1031
## ACE                263        630        597        189        410        553
## ERN1               718        712        705        521       1062        753
## PRTN3                0          2          1          2          0          2
## PLIN5               38        217        504       2167       1792        116
## RETN                 2          1          2          5          5          5
## CCL25                0          0          3          0          3          1
## ICAM1              223        392        467        149        252        369
## TYK2              1577       2964       4026       1596       2187       2165
## SMARCA4           2562       3670       4054       2169       3800       2862
## ACP5               461        707        722        965        837        757
## JUNB              2991       2811       3293       3566       3475       3221
## CYP4F22           1334       2253       2587       2440       2434       1437
## JAK3                76        205        224         76        133        151
## JUND              3676       3893       5821       5721       5118       7930
## CEBPA             4803       6730       8604       8823       8772       8665
## CEBPG             1638       1832       2119       1937       2525       2465
## ACP7               144        902        738        731        676        376
## ZFP36             1243       2157       3020       1596       2380       2467
## TGFB1              931       1246       1207        936        998       1575
## RPS19            18927      24164      25096      31759      19408      36466
## PSG2                 0          4          0          5          5          5
## FUT2               152        109        164         83        105        115
## NKG7                16         36         54         24         30         72
## ZNF415             133        199        131        221        212        180
## LILRB2              36        106         91         50         31         65
## LILRA5               0          1          4          4          2         12
## KIR3DL1              0          0          0          1          0          1
## KIR3DL2              0          1          0          1          0          0
## SMOX               199        507        390        622        559        502
## BMP2               324        631        981        400        737        848
## GINS1               96        210        274        130        215        158
## PLCG1             1814       3004       3114       1661       2638       2350
## YWHAB             6612      10179      10036       8220       9821       9703
## PI3                 87        173        183         75        825         86
## MMP9                51        199        133         49         40        123
## CD40               282        599        582        377        388        635
## PFDN4              182        385        409        456        309        488
## BMP7               935       1376       1815       1364       1468       1446
## RPS21             7083      10639      12933      14725       6686      17122
## MX1                433        414       1019        311        471        723
## PFKL              3493       6404       7613       5106       5817       5185
## IL17RA             705       1136       1435        554       1220        869
## UBE2L3            1287       2293       2260       2416       2161       2304
## TPST2              506       1036       1134        785        980       1187
## XBP1              2024       3440       3905       4018       3280       3255
## UQCR10             587       1083       1214       1674        812       1192
## SEC14L2            185        301        491        446        357        346
## APOL6              310        620        507        312        558        663
## APOL1              154        291        445        129        187        381
## TYMP               542       1346       1118       1384       1128       1278
## BEND2                0          0          0          0          3          1
## PHEX                12         17         16         19         18         21
## TIMP1              636       1840       1760        919        687       1728
## FOXP3               63        120         97         65         99         56
## MSN               4547       9731       9303       3551       7518       5467
## PGK1              4315       6758       7546       5684       7388       5833
## LAMP2             3316       4845       4790       4029       6396       4760
## CD40LG               7         36         14          9         16         23
## IRAK1             1554       2164       2472       2461       3183       2636
## COX2             87139     105917     111893      94767      92660      95699
##             GSM6222618 GSM6222619 GSM6222620 GSM6222621 GSM6222622 GSM6222623
## TNFRSF9             11          1          5         53        211         36
## ENO1             15908      14256      13408      36557      38081      31920
## PIK3CD             569        526        343        676        722        551
## PGD               1971       1933       2349       4205       5508       5316
## MTHFR             1514       1263       1147       1683       2486       1612
## TNFRSF1B           798        515        343        903       1674       1081
## PINK1             2405       2059       2013       1484       1637       1935
## IFNLR1             540        763        534        512        645        400
## RUNX3             1034       1656        892        898       1487        839
## SH3BGRL3          3735       2908       3246       6438       7082       7030
## CD52               267        116         86        254        249        193
## IFI6               803        436        482       2094      25391       5873
## ZC3H12A            511        660        770       6971      10259       7861
## UTP11              747        660        744       1133        731       1106
## JUN               2396       1439       1904       1703       2383       1755
## KANK4               76         67         43         17         10         17
## EFCAB7             282        249        218        141        128        146
## IL23R                1          0          0          5         13          7
## ADGRL2            1012        852        979       1687       2086       1672
## GBP3               658        215        371       1090       2319        688
## GBP1               638        547        611       1682       6371       1507
## GBP5                99         44         35        233       1364        453
## TGFBR3            2421       2178       2520       1103       1091       1013
## VCAM1              308        154        141        309        564        347
## PTPN22              59         50         27        119        236        222
## CD160               31         29         13         23          8         28
## FCGR1A               9          7          4         18         47         18
## MCL1              7054       6474       8011      11277      12743      12550
## CTSK              9062       4104       3003       2902       3037       5099
## RORC              1233       1345       1030         72         72        127
## S100A9            2131        557        972     182198     240816     231147
## S100A12              5          1          1        782        903       1007
## S100A8            2026        936       1360     133147     160725     156027
## IL6R               740        947        538        867        817        970
## RIT1               892        714        744       1046       1189       1194
## BGLAP               44         46         17         10         16         12
## IFI16             3161       2606       2797       8556      12842      10647
## AIM2                11          6         10         28        133         32
## CRP                  0          1          0          0          0          0
## FCGR2A             295        102         93        284        420        270
## HSPA6              351        168        232        324        436        347
## FCGR3A             104         76         23        204        683        355
## SELL               101         21         15        198        230        226
## GLUL              8873       7531       7712       8341      16416      14370
## PTGS2               96        145        164        151        220        316
## CRB1                 1          4          2          1          3          1
## KDM5B             2642       3113       3249       3600       4704       4102
## IL10                 1          0          0          6         23         12
## YOD1              3077       1770       2066       2406       2437       3691
## HHAT               269        288        323        189        235        278
## TRAF5              589        537        308        219        300        245
## NLRP3               86         21         10         61        119         93
## LINC01250            2          7          1          1          0          0
## RPS7             18462      12970      13765       9710       9744      10279
## RSAD2              107         53         82        259       3922        821
## FOSL2             9297       9488      10163       9199      12593       9754
## REL               1167       1795       1782       2207       3316       2852
## TGFA              1047        868        867       1609       2873       1507
## DYSF               412        276        332        507        521        380
## HK2                935       1719       1517       4481       6529       5212
## CD8A                87         89         48        225        516        274
## CD8B                42         47         21         60        123         53
## EIF5B             5215       5494       5358       7194       7776       6352
## IL1R1             2949       3086       2627       3107       4137       3685
## RGPD6              737        970        698        378        454        436
## IL1A                41         16         23         28         32         24
## IL1B                 5          7          7         59        299        419
## IL37              1805       1253       1199         52         86        128
## IL36RN            3340       2103       2188      13994      20314      10440
## IL1F10             171        124         94         56         86         83
## IL1RN             2871       2122       2614       4107       8423       5998
## NMI                519        335        437       1010       1654       1070
## TNFAIP6            192         84         66         33        119         59
## IFIH1              519        492        468       1092       3987       1756
## SCN1A                5          6          2          4          1          2
## ABCB11              63         95         67         40         60         29
## RBM45              250        208        236        251        296        284
## FRZB              1173       1069        526        191        128        464
## TFPI               524        410        371        373        389        558
## STAT1             2167       2281       2445       7494      24127      10245
## NABP1              394        253        206        443        843        453
## SF3B1            10250      12270      10859       7779       9045       7830
## CASP10             575        447        312        828       1275        834
## CD28                59         29         18        173        177        104
## CTLA4               26         10          1         70        213         64
## ICOS                24          4          4         67        198         62
## NDUFS1            2301       3227       2635       3786       4051       3780
## CXCR2              112        108        141       1023       1409       1257
## IRS1               841        537        747        626        432        450
## CCL20                4          0          7         62        114        118
## ATG16L1           1032       1207       1138        861        935        721
## GPR35               89         82         40         18         29         27
## PDCD1               17         10          7         22        160         56
## PPARG              292        106         69         87         40        105
## RPL15            31603      25154      26335      18923      19019      20574
## EOMES               11          9          4         19         47         46
## CX3CR1             120        145         29        157        158        165
## ACKR2               63         25         32        858        638        504
## CCR2               112         78         49        277        174        258
## CCRL2               20          6          3         20         48         36
## TLR9                21         31         12         16         11          8
## ARHGEF3            970        990        982       1205       1358       1389
## ADAMTS9            492        243        307        232        111        168
## TMEM45A          16019      14977      19505      29543      32425      38671
## CD80                 2          6          1         19         88         22
## CD86               127         96         74        166        373        154
## MIX23              276        253        246        283        285        292
## PLS1               185        161        208        254        388        343
## PTX3                47         37         34         35         23         41
## GOLIM4            1406       1107       1129       1378       1322       1303
## MYNN               721        670        830        563        575        541
## TNFSF10           1825       1816       1254       3669       4719       4220
## ADIPOQ            1808         80        134        263          1        234
## SPON2             2243       1357        847        388        250        429
## S100P              276        280        198        265        354        716
## WDR1              6450       6279       6295      10169      11290       8320
## CD38                11          8          5         55        109        100
## PPARGC1A           509        767        529         56         18        138
## TMPRSS11B            1          0          0          0          0          0
## CSN3                 0          0          0          0          0          0
## ALB                 19          8          1          0          1          0
## CXCL8                3          3          0        144       1414       1595
## CXCL2                7          1          4         21        148         45
## AREG                90        107        121        206        269        242
## CXCL10              24          3         13         98       1125        194
## CXCL13               2          3          3         27        581        109
## SPP1                18          7          0         48         32         49
## HERC6              318        310        306        963       9593       2782
## NFKB1             1995       2063       2232       3816       4439       3315
## SEC24B            1560       1735       1644       1627       1776       1436
## EGF                135        119        127          7         17         13
## IL2                  1          1          0          0          0          3
## IL21                 1          0          0          3         11          7
## IL21-AS1             0          0          0          1          7          2
## SLC7A11            117         96        146       1030       1129        814
## IL15               129         42         53         62        149         59
## EDNRA              573        406        458        388        564        424
## TLR2               242        249        116        361        499        349
## FGB                  0          0          0          0          0          0
## DDX60              892        924        966       1320       6882       2249
## SPCS3             2358       2328       2541       3557       3972       3627
## TLR3               169        166        158        233        355        274
## OSMR               978       1244        922       2422       3002       2369
## GZMK                36         17         44         24        143         78
## GZMA                74         18         27         81        211        114
## ANKRD55              4          2          1          2          1          5
## CENPK              224        114        180        364        295        237
## CAST             11293      11287      12544      10793      12342      11659
## ERAP1             2470       3035       3594       2874       3310       3028
## ERAP2             1620        204       2074       1552       2668        175
## TNFAIP8           1311        902       1022       1363       1834       1517
## CSF2                 1          0          0          0         13          2
## IL5                  0          1          7          3          0          0
## IL13                 0         11          0          3          7          0
## IL4                  5          4          2          1          0          0
## CD14               859        364        215        550        799        761
## CSNK1A1          10265      10637      13275      20400      23887      18325
## PPARGC1B           410        597        356       1304       1206       1217
## TNIP1             3418       2647       2523       3893       6816       4737
## ATOX1              480        402        455        758        912        790
## FAXDC2            1585       1548        990       1375       1435       1730
## IL12B                2          1          0         20         58          7
## MIR146A              3          1          1          2          3          4
## PDLIM7            1447       1467        962        908        920        762
## SERPINB1          1169        940        834       1570       3210       2929
## SSR1              3286       3304       3542       5064       6006       4913
## CD83               208         94        153        300       1085        420
## SOX4              1943       2030       2288       1159       1053       1739
## CMAHP             1283        737        980        106        128        122
## HLA-A            16592      23274      16328      19518      48375      22045
## HLA-C            22859      13422      17591      18222      36790      21969
## HLA-B            25610      33946      23037      34570      96511      40482
## MICA               544        492        212        298        347        267
## LTA                 12         29         15         22         25         29
## TNF                 38         55         51         43        115         65
## HLA-DRB1          6611       2362       3420       4844       6808       3947
## HLA-DQB1          2918        996       1574       1162       2148        995
## PPARD             1998       1822       1877       4462       5402       4379
## CCND3             1151       1138        962       1519       1732       1199
## VEGFA             1359       1406        739       1473       2117       1779
## RUNX2              227        157        128        190        241        153
## IL17A                0          0          0         26         60         35
## IL17F                0          0          0         11          6         14
## PRDM1             1328        860       1022       3004       4694       3431
## ATG5               801        656        703        624        787        694
## TRAF3IP2          1609       1477       1426       1984       1844       1710
## NCOA7              825        685        747       1541       2073       1409
## SGK1              2734       1713       1864       3967       6857       6139
## IFNGR1            2663       2473       2422       4357       6051       4935
## TNFAIP3           1048        793       1488        753       1363        828
## SOD2              5082       4074       3821      11711      22326      16256
## LPAL2               16         12         13          5         10          4
## PLG                 11          2          3          4          8          6
## CCR6               116        152        105        133        180        140
## RAC1              8947       8936       9599      11762      13918      13048
## ZNF316            3221       2891       2396       1589       1670       1505
## AHR               3018       3049       3391       4243       4641       4641
## IL6                  3          1          2          5         17          8
## TOMM7             4715       3785       3312       2225       2661       2575
## CYCS              2613       2218       2509       6387       6386       5961
## AQP1             13444       9717       7269       5853       6331       8879
## NT5C3A            1077        819        903       1593       3142       2230
## EGFR              8295      12481      12131       9249       9211       8092
## CD36              2704       1162       1368       4332       6600       6466
## SAMD9              303        219        171       1716       6534       4375
## SERPINE1           236        171        126        158        293         85
## CUX1              2320       2433       2659       3365       3985       3265
## PSMC2             2709       2514       2922       3176       3625       2994
## NAMPT             1670       2039       2337       7172      12050       9107
## HYAL4               36         33         50        325        643        564
## LEP                340         27         55        164         11         16
## CALD1             7069       6708       4903       5905       4494       4918
## BRAF              1333       1371       1365        998       1162       1074
## EZH2               929        992        809       1411       1455       1345
## DNAJB6            4349       3835       4467       7432       9678       6928
## CSMD1               30         97         52         60          2          9
## CTSB             11764      10086       7699      23433      28087      32698
## EGR3              2186       2182       2560       2243       2161       2028
## TNFRSF10A          204        232        212        441        485        424
## BNIP3L            4705       4281       4472       3504       4316       5250
## DUSP4              494        442        384        571        916        336
## NRG1               296        383        793        335        420        293
## RPL7             31484      25727      28959      20643      17849      21949
## IL7                276        218        277        143        189        195
## GEM                383        321        278        246        253        216
## MYC               2431       1456       2551       3925       3294       3239
## GPT                628        634        616        325        281        257
## JAK2               822       1164       1113        920       1752       1130
## CD274               43         23         14        326        803        300
## IL33              1565        997       1162       2495       1214       2100
## LURAP1L-AS1         12          4         13          0          6          6
## LURAP1L            564        416        516        477        511        483
## TTC39B            1372       1543       1530       2754       3467       2792
## IFNA1                0          0          0          0          0          1
## TEK                393        317        244        219        226        296
## TRBV20OR9-2          0          0          0          1          1          0
## TOMM5              752        654        781        986       1036        928
## ANXA1             9438       6556       7729       9877      14608       8584
## ERP44             1308       1210       1370       1687       2032       1673
## ZNF483             270        379        187         99        122        145
## TNFSF15             34         21         12        106         98         57
## TLR4               441        271        222        274        291        295
## PTGS1             9857       9535      10465       7545       8066       6439
## HSPA5             6073       6564       7440      14120      18520      10919
## FNBP1             2754       2117       2267       2816       3592       2676
## CARD9              154        261        168        101        237        159
## TRAF2              465        410        312        582        772        540
## CLIC3             1425       1229       1331       2174       2673       2160
## IL2RA               29         12          3        108        212         78
## GATA3             8145       7475       8624       2987       3063       2623
## VIM              39274      23045      17986      18566      20919      20086
## CREM               338        268        234        223        311        254
## DKK1                24         29         23          8          2          4
## MBL2                 0          0          0          0          0          0
## SAR1A             2975       2672       2559       2377       2642       2602
## PRF1                41         27         17         95        296        140
## ZMIZ1             3574       3886       4032       4757       5034       3824
## IFIT3              446        317        338        847       6690       1682
## TALDO1            2997       2344       2689       4723       5478       5098
## IGF2               379        630        261        318        490        371
## INS-IGF2           360        558        236        284        442        344
## IGF2-AS              5          2          3          0          0          0
## INS                  0          0          0          0          0          0
## STIM1             2866       3431       3532       2774       3368       2664
## TRIM22            1137        962        678       1354       8404       2980
## PTH                  0          0          0          0          0          0
## SAA1                20         21         27        198        531        731
## SLC1A2              77        132         89         40         54         31
## FOSL1               36         25         81        344        396        257
## CCND1             9082      10633      10557       5388       3991       4013
## JRKL               380        411        245        414        375        333
## MMP7               729        819        223        144         16        216
## MMP1                73        117         22          8        458         50
## MMP3                21         15          7          0         32         28
## CASP1             1357        915       1017       1491       2292       1479
## DLAT               887        974       1084       1578       1686       1308
## IL18              3556       2544       2735       1985       1996       1827
## CD3E               122         81         49        423        549        345
## MCAM              2337       2294       1643       2489       1990       2143
## WNK1              7027      10941      11076       9562      10100       7525
## TNFRSF1A          3438       3577       3607       4991       6088       5199
## GAPDH            33604      27190      23246      43955      58803      55806
## CD4                894        508        549        950       1261        998
## SLC2A3             241        208        227        301        605        450
## CLEC4D               0          0          0          2          7          2
## KLRB1               17          6          5         62         33        116
## CD69                41         18         21         62         85         62
## CLEC2B            2157       1630       1637        727        973       1768
## OLR1                 7          3          0          6         39         12
## ABCD2               97         14         12         51         39         23
## LRRK2              529        537        701        444        427        552
## VDR               1594       2340       1945       2399       2536       2199
## TMBIM6           13796      13202      13006      17712      18014      18126
## PFDN5             5185       3872       3721       2333       2512       2603
## SP7                  0          0          4          1          0          1
## MUCL1             6885       7520        996        331        410        894
## CD63             11121       6977       6339       4954       6328       5779
## SUOX              3244       3269       3208       2803       3371       2985
## RPS26             2164        613       1665       2057       2918       2222
## RPL41            11011       8564       8684       7134       7882       7874
## IL23A               11         21         28         22         67         29
## DDIT3              368        337        240        172        209        229
## IFNG                 0          0          2          5         39         15
## IL22                 0          0          0          7          3          0
## LYZ               2155        686        490       4797       7244       3966
## CAPS2               90        137        102         40         28         24
## SYT1                42         25         39         14         13          7
## BTG1              8856       8568       8802       6557       9679       8673
## HCAR3              491        243        418        795       1446        969
## GJB2              3249       4965       3678      64488      90408      92711
## GJB6              2653       4570       2622      18982      20610      28537
## TNFSF11              9         19          5          9          9         12
## TPT1             76653      62481      62318      56006      56410      63884
## LMO7              1971       2271       2514       2824       4868       4144
## TNFSF13B           221        100         44        155        273        203
## LAMP1             8786       9634       9737      11985      12960      12388
## PSME2             1460       1149       1109       2302       4048       2118
## GZMB                10          6          6        125        443        104
## RPL36AL           2938       2678       2647       3276       3624       2868
## PYGL              1611       1651       1513       5572       5797       4008
## HIF1A             2151       2592       2240       5814       6482       5620
## FOS               6468       2826       3537       1165        387        493
## JDP2              1301        989       1189        655        903        847
## NOXRED1             66         54         35         35         27         38
## SERPINA1            80        225         40        374        460        423
## CDC42BPB          4093       4401       4673       5422       6042       4846
## PLA2G4D            664        824        627      10122      18713      11060
## TRIM69              11         25         20         27         40         23
## SLC51B              26         29         16         11         16         14
## SMAD3             2270       1694       1586       1476       2027       1815
## CYP1A1              39         57         33          2          1          0
## AKAP13            4103       4249       4813       3589       3826       3337
## MEFV                21          3          8         44         77        121
## SOCS1              121         72         43        183        502        219
## ATXN2L            3425       3101       2721       2922       3250       2644
## CD19                10          3          5         11          7          4
## ITGAL              197         76         48        320        484        328
## ITGAM              369        120        111        326        423        291
## ITGAX              298         90         65        162        456        307
## DNAJA2            2394       2403       2575       2884       3407       3145
## SIAH1             1321       1380       1045        722        941        838
## ADCY7             1958       1396       1448       1812       2443       1871
## NOD2               674        736        785       2342       2652       2273
## CMTM2                2          8          0          0          1          3
## SF3B3             3484       3348       3766       5778       5726       5310
## PSMD7             2562       2628       2805       3500       3934       3139
## WWOX               188        300        291        356        556        349
## MAF               8936      10259       9266       6527       9465       6829
## SLC7A5            1163       1536       1745      10386       9383       6721
## CXCL16             958        923        862       1733       2644       1511
## XAF1              1231        729        490       1294       6812       2266
## CD68              1143        443        364       1134       2000       1500
## PER1              7287       2847       4421        969       1948        813
## NOS2                 5         24          2        413       1995        314
## TRAF4              870        771        568        489        555        433
## CCL2               338        128        158        641        649        569
## CCL5               106         79         55        187        681        235
## CCL3                 1          0          0         10        144         47
## CCR7                70         23         13        236        463        170
## STAT3             5826       6901       6898      16942      23378      16687
## SOST                19         33          6         80        107         19
## ITGA2B              39         15         13         25         13         11
## EFCAB13            194        162         97         48         40         60
## NPEPPS            3710       3857       3652       4404       4966       4116
## TBX21                9         13          3         10         46         31
## COL1A1           72107      58198      49649      30620      34531      67618
## MRPS23             756        669        606        739        763        673
## MIR21                1          1          1          1          3          1
## RPS6KB1           1093       1358       1441       1446       1648       1483
## ACE                707        624        506        445        608        438
## ERN1               930       1240        985       1015       1314       1117
## PRTN3                0          0          0          0          0          0
## PLIN5              187        252        101        530       1117       1024
## RETN                 3          0          0          0          0          0
## CCL25                3          1          1          1          0          0
## ICAM1              537        412        323        516       1222        798
## TYK2              3067       3097       2477       1875       2234       1870
## SMARCA4           3232       4023       3971       5077       5024       4819
## ACP5               766        799        782       1831       2852       3038
## JUNB              3139       3286       2748       8267       9530       7686
## CYP4F22           1931       1886       2153       6544       6521       6486
## JAK3               223        145         82        331        442        270
## JUND              5193       4503       6325       3244       3525       3206
## CEBPA             6974       5750      10927       9597      11672       9198
## CEBPG             2460       2299       2871       2731       2974       2485
## ACP7               624        338        203       4366       6627       4074
## ZFP36             2878       1458       1574       3463       3898       2893
## TGFB1             1679       1214       1050       1853       2311       1953
## RPS19            25550      19038      22501      14779      17825      16667
## PSG2                 0          0          4          4          7          0
## FUT2               239        217         79        514       1226       1450
## NKG7                53         16         22         38        156         62
## ZNF415             248        195        136         56         19         58
## LILRB2             129         37         11        112        322        147
## LILRA5               6          3          0          6         43         12
## KIR3DL1              0          0          1          0          3          0
## KIR3DL2              0          3          0          1          3          3
## SMOX               489        380        297       1487       1692       1927
## BMP2               633        916       1400        374        699        325
## GINS1              208        227        212        579        660        547
## PLCG1             3279       3506       2809       3339       3544       3006
## YWHAB             9896       8992      10191      12568      13840      12313
## PI3                173         82         97      75302     127198     125561
## MMP9               277         69         68        355       1717        452
## CD40               621        444        392        308        675        391
## PFDN4              405        270        326        298        268        314
## BMP7              1762       2164       2297       1990       1917       2006
## RPS21             9258       7206       8348       6510       6578       7711
## MX1                769        530        416       2436      16568       5935
## PFKL              5747       5097       4994       5395       5662       4705
## IL17RA            1190       1247       1069       1367       1298       1242
## UBE2L3            2231       1708       1952       2827       3108       2735
## TPST2             1041        681        984        889       1095        757
## XBP1              4132       3210       2197       4298       5594       4171
## UQCR10             876        676        736        986       1117       1122
## SEC14L2            485        724        600        363        597        898
## APOL6              796        583        514       2866       8482       3429
## APOL1              374        248        217       2230       9910       4964
## TYMP              1264        958        885       9497      30950       7858
## BEND2                0          0          1          1          0          0
## PHEX                21         27         11         47         45         53
## TIMP1             1875        766        659        903        799       1264
## FOXP3               89         85         70        173        367        166
## MSN               8062       7047       7402      13422      14911      11752
## PGK1              6936       6988       7092      13078      14341      15024
## LAMP2             5302       6312       6501       7227       8785       7985
## CD40LG              16         21          2         57         29         48
## IRAK1             3171       2893       2954       5946       6667       5148
## COX2            100671     104963      86881     113125     104720     158408
##             GSM6222624 GSM6222625 GSM6222626 GSM6222627 GSM6222628 GSM6222629
## TNFRSF9             62         32         60         53         38         84
## ENO1             21148      28181      32621      19056      26046      40368
## PIK3CD             401        612        572        624        712        871
## PGD               3722       5226       4342       4022       2532       5567
## MTHFR             1407       2186       1893       2652       1813       1940
## TNFRSF1B           784        876        836        858        909       2048
## PINK1             1348       1624       2274       2002       1978       1591
## IFNLR1             343        473        498        456        493        469
## RUNX3              845        713       1068        782        853       1384
## SH3BGRL3          4702       5224       5789       4207       6394       6713
## CD52               145        133        179        159        323        405
## IFI6             12766      18464      14861       7094       1772       4668
## ZC3H12A           8663       9789      10150       6890       5095      12994
## UTP11              903       1112       1172        780        943       1334
## JUN               1189       1680       2112       1527       3632       1696
## KANK4                3         14          2         21         16          2
## EFCAB7              84        163        175        193        128        132
## IL23R                4          6          5          2          0         12
## ADGRL2            1154       1628       1383       1127       1126       2070
## GBP3               967       1279       1338       1232        647        700
## GBP1              1791       2320       2162       1762       2174       2599
## GBP5               226        388        278        423        175       1119
## TGFBR3             870       1162       1493        919       1214       1639
## VCAM1               99        187        109        252        414        778
## PTPN22             113        377         74        260         87        390
## CD160               33         19         11         25         11          7
## FCGR1A              15         19          8         20          6        140
## MCL1             10526      10548      10758       8733      12113      12414
## CTSK              2015       3475       2329       4668       3827      10410
## RORC                70        118        146        439        162        215
## S100A9          169007     262610     224978     159909     116870     257930
## S100A12            843       1212        852        687        385       1639
## S100A8          101757     177411     142933     113632      80752     162953
## IL6R               825        861        765        900        796        783
## RIT1               908       1235       1300       1324       1231       1197
## BGLAP               20         21         11         13         16         11
## IFI16             6352      10484      10107       7957       7179      12821
## AIM2                29         74         36         50         28        136
## CRP                  0          2          0          0          3          0
## FCGR2A             199        202        146        193        275        827
## HSPA6              455        316        448        343        508        220
## FCGR3A             272        216        331        330        309        476
## SELL                72        131         81        101        263        503
## GLUL             17711      14372      21782      13591      10212      13880
## PTGS2               37        196         77        155        101        456
## CRB1                 1          0          1          2          2          0
## KDM5B             3207       4208       5852       4413       3515       4406
## IL10                 5          3          9          9         22         16
## YOD1              4886       2563       2385       1864       2151       1965
## HHAT               147        287        283        366        128        190
## TRAF5              246        330        141        432        282        295
## NLRP3               44         46         27         59         79        254
## LINC01250            1          2          0          0          1          0
## RPS7              7693      11415      11084      10304       8491      12496
## RSAD2              956       1427        951        484        169       1389
## FOSL2             8392      10079      12260      10248      10953      14224
## REL               2541       2991       3775       3326       2697       3375
## TGFA              1712       1612       1785       1250       1267       2495
## DYSF               180        522        345        273        447        751
## HK2               3692       5411       4299       4094       3107       8120
## CD8A               227        277        238        252        164        357
## CD8B                64         52         51         52         68        103
## EIF5B             4929       6277       8435       4692       5345       7329
## IL1R1             2014       3739       3708       3982       3219       8063
## RGPD6              468        621        610        596        563        483
## IL1A                 4         54         36         21          9        114
## IL1B                35        374         97        198         36       1435
## IL37               318        103        124        219        179         20
## IL36RN           11716      10019      11804       6747      10384      11588
## IL1F10              76         61         70         65         62         56
## IL1RN             4839       4972       5097       3256       2921       6840
## NMI                794       1220       1348        804       1067       1100
## TNFAIP6             32         37         36         29       1853        531
## IFIH1             1569       2220       2189       1436        720       1372
## SCN1A                2         10          1          5          0          3
## ABCB11              55         99         76         75         24         13
## RBM45              195        289        248        240        254        266
## FRZB               180        577        500        563        269        595
## TFPI               278        590        391        441        617        801
## STAT1            10173      13884      13669      12573       6123      10686
## NABP1              365        750        396        661        842        744
## SF3B1             6976      10404       9046      10811       9064      10007
## CASP10             940       1010        870        947        750        695
## CD28                46         93         75         93        119        212
## CTLA4              115         79         73         81         32        101
## ICOS                58         54         52         46         84        131
## NDUFS1            2415       3769       3756       3164       2824       4139
## CXCR2              627        720       1289        766        189        814
## IRS1               460        447        414        323        599       1077
## CCL20              211         87        199         78         70         96
## ATG16L1            875        826        853        812        892        849
## GPR35               38         42         21         66         29         51
## PDCD1               59         58         53         41         36         70
## PPARG               39        103         55         73        145        303
## RPL15            15227      20333      22946      20683      18923      25572
## EOMES               36         33         15         35         15         41
## CX3CR1              50        125        139        195         95        195
## ACKR2              340        377        254        236        533        354
## CCR2                86        158        114        177        285        256
## CCRL2               21         42         17         30         28         57
## TLR9                 9         17         14         13         23         41
## ARHGEF3            886       1410       1241       1492       1146       1538
## ADAMTS9             90        149         85        139        274        447
## TMEM45A          28079      30043      35104      30705      24915      25514
## CD80                25         13         17         20         17         15
## CD86               111        114         98        134        222        232
## MIX23              235        351        298        243        236        270
## PLS1               165        245        284        173        183        402
## PTX3                13         61         22         53         22        250
## GOLIM4             683       1022       1258        905       1090       1699
## MYNN               510        718        680        504        578        590
## TNFSF10           2252       4271       5166       4991       4805       5400
## ADIPOQ              36        147         74        171        239        327
## SPON2              354        646        700        646        549        500
## S100P              458        794        310        791        177        984
## WDR1              7819       8296       9335       7415       8435      13342
## CD38                27         62         34         65         33         84
## PPARGC1A            41        122        165         78         73        184
## TMPRSS11B            0          0          1          0          0          0
## CSN3                 0          0          0          0          0          0
## ALB                  3          3          1          6          0          0
## CXCL8              128       1988        372       1530        131       7255
## CXCL2               30         52         23         50         24        232
## AREG               128        246        150        123        108        310
## CXCL10              96         89        156        165        267        173
## CXCL13             172         59        130         30         13        979
## SPP1                17         21         24         29          4         32
## HERC6             4102       5997       4639       3073       1023       3067
## NFKB1             2582       3336       4292       2938       3488       3853
## SEC24B            1217       1416       1542       1524       1400       1649
## EGF                  8         13         28         48         15          7
## IL2                  0          3          0          0          0          0
## IL21                 2          4          4          2          4          2
## IL21-AS1             4          5          2          1          1          1
## SLC7A11            636        843        605        489        553       1044
## IL15                73         75         70        113        115        155
## EDNRA              135        472        310        473        425       1223
## TLR2               342        526        502        689        410        732
## FGB                  1          0          0          0          0          0
## DDX60             3455       3741       4402       2544       1546       1650
## SPCS3             2658       3172       3437       2511       3466       4394
## TLR3               139        298        286        235        226        186
## OSMR              1590       2327       2264       1914       2838       5108
## GZMK                49         37         30         35          8        163
## GZMA                42         93         69         84         61        269
## ANKRD55              7          5          2          3          4          3
## CENPK              109        232        141        220        178        192
## CAST              9717       9565      12847      10348       9650      12313
## ERAP1             1630       2959       3172       3262       3149       2884
## ERAP2              214       3083        204        349       1856        245
## TNFAIP8            889       1212       1426       1479       1336       1549
## CSF2                 3          6          2          2          4          4
## IL5                  1          0          1          1          3          0
## IL13                 1          3          0          2         76          2
## IL4                  2          1          1          2          1          0
## CD14               424        665        414        499        544       2260
## CSNK1A1          17042      21748      22142      16004      18356      19776
## PPARGC1B          1363       1374       1459       1290        988       1416
## TNIP1             4745       4426       5005       4785       3770       7212
## ATOX1              701        840        769        727        711        854
## FAXDC2            1393       1596       1714       2165       1252       1245
## IL12B                9          7         15         18         12         16
## MIR146A              4          6          4          4          2          4
## PDLIM7             982       1228       1026       1429       1006       1256
## SERPINB1          1649       4210       3067       1428       1301       8119
## SSR1              3399       4741       4760       3600       3912       6527
## CD83               377        281        347        301        289        422
## SOX4               539       2275       1557       2489        863       2724
## CMAHP               80        185        118        201        324        233
## HLA-A            24238      24702      34078      27347      29258      29451
## HLA-C            26548      22129      24915      33140      23389      23826
## HLA-B            51917      41146      58790      53369      43262      39922
## MICA               301        423        331        382        312        182
## LTA                  6         19         17         26         23         18
## TNF                 56         66         97         65         71        134
## HLA-DRB1          5267       3736       4019       4439       8778       9805
## HLA-DQB1          3816       1852       1733        646       4543       4141
## PPARD             5794       3997       4254       3487       4443       4432
## CCND3             1259       1427       1702       1037       1191       1619
## VEGFA             1518       1354       1622       1408       2716       1544
## RUNX2               97        231        145        260        131        355
## IL17A               15         50         50         40          9         21
## IL17F               11         28         59          9          2         17
## PRDM1             3026       2489       3429       2281       2433       3730
## ATG5               449        690        750        652        643        879
## TRAF3IP2          1580       1710       1862       1595       1688       2389
## NCOA7             1178       1793       1748       1466       1458       4636
## SGK1              4777       6391       8597       7474       6426       7350
## IFNGR1            2762       5322       4017       4252       3971       8366
## TNFAIP3            887        758        698        900        912       1463
## SOD2              9460      13191      20169      10320      10115      45179
## LPAL2               20         21         15         13          4          6
## PLG                  3          5          6          3          4          2
## CCR6                93         72        116        105        115        207
## RAC1              9489      11469      14547      10689       9978      14953
## ZNF316            1999       2631       2309       2906       1913       1748
## AHR               1994       4391       3268       3786       4964       7181
## IL6                  2          6          7          9          8        124
## TOMM7             1998       2854       2860       2510       2337       3019
## CYCS              4211       6151       5966       3143       3950       7547
## AQP1              3822       7049       6290       8181       6447      12500
## NT5C3A            2724       2549       2801       1886       1724       2281
## EGFR              6060       8511      10146       8771       8036      10468
## CD36              7465       8457       7192       8671       7111       3706
## SAMD9             2662       3748       2715       2167        722       4127
## SERPINE1           277        138        128         96        205        602
## CUX1              2584       3129       3828       2508       3580       3884
## PSMC2             2570       2823       3118       2278       2787       3042
## NAMPT             6369      10963      11271       5863       5128      28090
## HYAL4              426        523        448        495        144        613
## LEP                 10         22         19         56         41          9
## CALD1             2912       4839       4094       5629       3759       7333
## BRAF               975       1238       1118       1300        990       1012
## EZH2              1032       1612       1052       1412       1037       1049
## DNAJB6            6748       6963       7901       5279       6395       8040
## CSMD1               20         11          2         46          7         16
## CTSB             14316      27087      25751      17348      20450      40774
## EGR3              3152       2280       2507       1879       2923       1535
## TNFRSF10A          473        514        418        272        524        514
## BNIP3L            4675       4056       4890       5460       4570       5504
## DUSP4              464        382        496        324        311        919
## NRG1               552        375        341        153        502        785
## RPL7             14455      18100      21974      21033      18181      24441
## IL7                 97        191        233        215        196        230
## GEM                143        204        171        261        107        496
## MYC               3759       2949       4208       3192       3478       3498
## GPT                392        315        438        723        245        209
## JAK2               749       1071       1263       1207       1348       1068
## CD274              262        594        374        258        444       1441
## IL33               922       3634       2656       1919       2498       1721
## LURAP1L-AS1          8          5          1         11          4          1
## LURAP1L            284        491        530        472       1433        543
## TTC39B            2254       2945       2590       2859       2481       2680
## IFNA1                2          0          0          0          1          0
## TEK                149        383        199        285        255        469
## TRBV20OR9-2          1          0          2          0          0          1
## TOMM5              823       1058       1068        643        786       1037
## ANXA1             5004       7430       5459       3628       4903      26907
## ERP44             1422       1560       1832       1383       1431       1935
## ZNF483              93        135        100        182        128        114
## TNFSF15             21         47         42         66         49        107
## TLR4               196        200        148        274        330        572
## PTGS1             5618       6986       6869      14000       8891       5928
## HSPA5            13311      11727      14826       7977      11006      16867
## FNBP1             2204       2517       2609       3121       2551       3508
## CARD9              203        198        189        315        287        163
## TRAF2              471        562        653        669        818        485
## CLIC3             1811       1846       2325       1823       1908       1812
## IL2RA               77         57         69         61         71        227
## GATA3             2197       2665       3674       4991       4537       1868
## VIM              15657      14784      18380      17817      23131      23102
## CREM               177        327        277        274        276        459
## DKK1                 8          2          7          3          3         10
## MBL2                 0          0          1          0          0          0
## SAR1A             1967       2527       2629       2274       2563       3345
## PRF1               102         80         81         95         73        171
## ZMIZ1             3050       3693       4150       4388       3336       4794
## IFIT3             1630       3805       3064       2225        946       2608
## TALDO1            3908       4461       4785       3585       3395       5329
## IGF2               307        411        289        263        276        667
## INS-IGF2           276        370        255        237        242        597
## IGF2-AS              0          2          1          1          1          1
## INS                  0          0          0          0          0          0
## STIM1             2052       3057       3222       2253       3208       3452
## TRIM22            4107       6612       4484       4814       1698       3904
## PTH                  0          0          0          0          0          0
## SAA1               278        875        179        230        279       1530
## SLC1A2              23         23         19         27         41         22
## FOSL1              920        270        177         71        290        360
## CCND1             3958       4064       6421       3409       5253       2475
## JRKL               146        346        323        304        465        285
## MMP7                43        170        204         63        168        331
## MMP1                 8         42         23         51         76       1091
## MMP3                 0         40         28         26         96        353
## CASP1             1089       1944       1637       1617       1050       1587
## DLAT               988       1210       1404        913       1141       1905
## IL18              1738       2000       2225       2189       2510       1111
## CD3E               271        293        225        246        271        490
## MCAM              1376       2335       1780       2329       1628       5723
## WNK1              6524       7724       9866       8366      10115       8456
## TNFRSF1A          4236       5196       6213       4623       5265       6338
## GAPDH            31031      39542      47957      34264      36985      72106
## CD4                587        617        545        778       1249       1309
## SLC2A3             202        402        310        468        438       1307
## CLEC4D               0          5          4          5          2         46
## KLRB1               71         44         37         55         44        146
## CD69                53         59         38         82         49        219
## CLEC2B             943       1439       1358       2223       2113       1592
## OLR1                 4         44          4         26          4         32
## ABCD2               15         22         14         38         43         47
## LRRK2              230        618        372        597        363        588
## VDR               1910       2439       2459       2445       2023       2957
## TMBIM6           11668      15244      17521      13581      13453      20381
## PFDN5             1834       2726       2696       2625       2448       2881
## SP7                  0          0          0          4          0          0
## MUCL1              716       2284       1374       1790        676       1698
## CD63              3644       4849       5487       5383       7254      10286
## SUOX              2415       2794       3207       2833       2641       2332
## RPS26              975       2038       2213       1784       1494       2330
## RPL41             5909       7372       8164       6751       5916       7464
## IL23A               34         43         38         44         16         57
## DDIT3              216        263        236        303        201        248
## IFNG                 6         10         18         20         18         13
## IL22                 3          6          6         14          1         13
## LYZ               2620       2757       1103       2205       2202       6504
## CAPS2               31         42         46         49         25         49
## SYT1                13          4         13         25          8         21
## BTG1              4201       8771      11661      13360       6598      13145
## HCAR3              671        655        811        880       1084       1200
## GJB2             44112      73982      66903      40904      38785     115153
## GJB6             13818      22575      14539      14468      11576      27714
## TNFSF11              7         11         11         10         53         16
## TPT1             44416      59149      65525      58654      62357      77825
## LMO7              4358       3229       3598       4445       2322       2645
## TNFSF13B            96        147         72        160        151        328
## LAMP1             8373      11542      11048      10824       9722      12841
## PSME2             2225       2627       2644       1958       1885       2501
## GZMB               151        121        111        132        113        371
## RPL36AL           2518       3216       3512       2756       2620       4050
## PYGL              3178       2797       5540       3477       4020       3209
## HIF1A             3127       5430       4524       4073       3978      13427
## FOS                861       1289        468        455       1189        672
## JDP2               881        944       1110        758        943        697
## NOXRED1             31         68         46         59         39         22
## SERPINA1           192        452        291        348        178       1198
## CDC42BPB          4083       5456       5569       5262       4882       6007
## PLA2G4D          14648      14925      14105       8902      10599       9736
## TRIM69              35         41         16         57         19         18
## SLC51B              11         16         18         21         15         21
## SMAD3             1677       1750       2517       2302       1277       2037
## CYP1A1              15          3          0          2          2         84
## AKAP13            3153       3198       2938       3877       3775       3888
## MEFV                53        107         64         65         27        308
## SOCS1              181        272        223        158        321        510
## ATXN2L            2849       3572       3060       3345       2667       3239
## CD19                 4          4          4         10          5         51
## ITGAL              259        325        186        332        342        549
## ITGAM              245        300        156        629        566        239
## ITGAX              195        421        101        472        303        615
## DNAJA2            2626       3149       3425       2599       2858       3130
## SIAH1              788        924       1011       1044        755        948
## ADCY7             1647       1749       1678       1763       2328       1727
## NOD2              2603       3167       2537       2691       2273       2016
## CMTM2                0          7          1         10          4         11
## SF3B3             4005       6099       6677       5465       5079       5567
## PSMD7             2722       3383       3750       2769       2908       3951
## WWOX               540        781        399        503        441        170
## MAF               4458       7962      10411      10125      11648       4906
## SLC7A5            7146      12051       8086       4718       3943      12272
## CXCL16            1480       1539       2130       1417       1405       2619
## XAF1              5774       7118       4650       4218       1471       1643
## CD68               858        883        711        863       1552       1926
## PER1              2704       2606       3151        771       1064        958
## NOS2               635        366        583        229         74       1417
## TRAF4              736       1010        769        573        561        843
## CCL2               216        511        384        407        945       1990
## CCL5               149        114        115        259         94        498
## CCL3                19         20         40         56         32        165
## CCR7               223        192        136        112        127        255
## STAT3            13885      16818      19566      14505      11752      28849
## SOST                22         19         41          8         25         93
## ITGA2B              11         39          8         44         27         29
## EFCAB13             56         95         55        127         74         41
## NPEPPS            3259       4843       4033       4662       4831       4797
## TBX21               10         25          9         22          9         34
## COL1A1           13991      30923      29376      37496      82884     198659
## MRPS23             559        726        834        570        681        837
## MIR21                3          4          2          3          3          2
## RPS6KB1           1286       1649       1801       1231       1351       1884
## ACE                264        587        354        385        693       1458
## ERN1              1091       1283       1222       1493       1078       1161
## PRTN3                0          0          0          0          0          0
## PLIN5             1035       1284        891       1128        460       1086
## RETN                 0          3          0          4          0          5
## CCL25                0          1          0          0          0          0
## ICAM1              448        463        443        759        732       1852
## TYK2              2343       3059       2153       3509       2516       1982
## SMARCA4           4045       4920       5475       4726       4158       5482
## ACP5              1849       2936       2206       2577       2316       2053
## JUNB              4628       8623       8045       4928       7682       8732
## CYP4F22           5209       5174       5653       5193       4867       4836
## JAK3               256        358        172        384        241        727
## JUND              4793       3502       4106       2721       3340       4357
## CEBPA            10724       8897      11189       8623       6516       8431
## CEBPG             2409       2597       3025       2150       2450       2828
## ACP7              5277       3884       3677       3347       3589       3897
## ZFP36             2381       3229       2982       2342       3484       4225
## TGFB1             1558       1553       2364       1527       2283       2573
## RPS19            13509      17786      20781      18541      15143      19731
## PSG2                 2          0          8          3          0          2
## FUT2               591        857        821        585        874        945
## NKG7                44         52         60         65         49        197
## ZNF415              41         80        104         95         27         63
## LILRB2              75        121         69         95        188        369
## LILRA5               5         10          1          9         18         82
## KIR3DL1              1          0          1          1          0          2
## KIR3DL2              4          1          0          7          0          3
## SMOX              1777       1078       1108       1099       1153       1262
## BMP2               387        317        442        255        266       1568
## GINS1              215        410        419        357        396        526
## PLCG1             3058       3673       3388       4479       3240       2667
## YWHAB             8853      10979      13841       9193       9604      14458
## PI3              57480      85380      74670      58951      19422     155270
## MMP9               345        332        257        199        472        988
## CD40               285        351        349        451        409        486
## PFDN4              179        316        292        230        243        366
## BMP7              1768       1557       1528       1248        845       1206
## RPS21             5295       7622       7936       6812       6441       8234
## MX1               7539      12256       9962       4929       2172       8424
## PFKL              4567       5670       6095       5808       5361       6156
## IL17RA            1124       1281       1383       1394       1321       1577
## UBE2L3            2123       2491       2937       1933       2164       2931
## TPST2              668       1130       1323       1299       1239        850
## XBP1              3909       4771       5418       3726       4015       4997
## UQCR10             784       1218       1178        819        871       1330
## SEC14L2            644        694        545        922        316        678
## APOL6             2975       4362       4150       3665       3012       3553
## APOL1             2710       3511       5440       1616       1307       4203
## TYMP             18863      25128      21380      15144       8594      21539
## BEND2                0          0          0          0          0          0
## PHEX                26         75         46         54         39         87
## TIMP1              469        828        743        764       1658       2292
## FOXP3              202        169        223        239        130        274
## MSN               7111       9427      12036       7893      10637      15873
## PGK1              8218      11293      13001       8146      10710      17294
## LAMP2             4530       7864       8182       7428       6901       8494
## CD40LG              14         40         19         33         85         34
## IRAK1             4677       5182       6329       3901       4687       6158
## COX2             82308      84352     125211     104007      70696      97337
##             GSM6222630 GSM6222631 GSM6222632 GSM6222633 GSM6222634 GSM6222635
## TNFRSF9              3         17          3         12          5          6
## ENO1             11762      20261      13415      15318       9816      13704
## PIK3CD             346        427        329        349        459        394
## PGD               1635       4114       2327       3923       1888       3012
## MTHFR             1124       1403       1323       1071       1146        755
## TNFRSF1B           532        591        327        412        485        526
## PINK1             1958       2767       2026       3036       2320       3063
## IFNLR1             469        542        582        579        721        407
## RUNX3              743       1060        960        713       1063        921
## SH3BGRL3          3037       5727       2759       3583       2761       3010
## CD52               100        220         44        153        179        107
## IFI6               276        607        345        792        350        355
## ZC3H12A            514       1243        907       1347        787        931
## UTP11              465        634        665        782        698        542
## JUN               2019       2488       1824       1472       1807       2298
## KANK4              135        140         62        110         44         92
## EFCAB7             147        218        208        234        214        183
## IL23R                0          1          1          0          0          1
## ADGRL2            1236       1125        992       1160       1079       1564
## GBP3               363        560        269        567        625        396
## GBP1               460        614        593        520        504        425
## GBP5                38         41         24         45         67         50
## TGFBR3            2731       2344       3204       1590       2298       1940
## VCAM1              136        164         71        120        197        113
## PTPN22              41         49         19         49         49         32
## CD160               24         16         32         17         23         11
## FCGR1A              11          8          7          5          8          6
## MCL1              6721       7583       8874       7435       8461       6320
## CTSK              6899       5230       2953       2200       7605       3183
## RORC               755       1380       1331       1572       1476       1606
## S100A9             607       1818        264        849        505       1092
## S100A12              7          0          2          2         12         19
## S100A8             320       2075        196        832        458        876
## IL6R               643        484        710        613        624        482
## RIT1               501        739        561        818        809        586
## BGLAP               18         27         20         19         34         26
## IFI16             2194       2604       2655       2338       2664       1887
## AIM2                10          6         11          7         20          4
## CRP                  0          0          0          0          0          0
## FCGR2A             145        179         73        123        181        175
## HSPA6              161        223        359        215        197        180
## FCGR3A             157         72        108        238         59        258
## SELL                44         17         13         18         84        153
## GLUL              7140      10298       6076      10523       7753      10579
## PTGS2              127         55        115         78         97         50
## CRB1                 5          3          8          1          4          6
## KDM5B             2531       3338       3812       2940       3157       2529
## IL10                 2          3          2          0          1          0
## YOD1              3216       1790       3385       2465       2799       1139
## HHAT               198        260        299        199        206        297
## TRAF5              347        173        442        341        408        175
## NLRP3               42         24         15         15         55         17
## LINC01250            0          1          1          3          0          2
## RPS7              8609      17449      13335      14019      15454      12876
## RSAD2               51         65         60        123         83         64
## FOSL2             7695       7499      12292       9687       9162       7975
## REL               1559       1247       2514       1384       1820       1160
## TGFA               872       1188       1157       1416       1155        946
## DYSF               370        344        251        166        389        612
## HK2               1358       2413       1178       1740       1062       2978
## CD8A                88        124         35         71        126         36
## CD8B                40         89         23         34         67         46
## EIF5B             4310       7403       5812       4891       5005       5073
## IL1R1             2356       2314       2312       2719       4012       2176
## RGPD6              648        476       1086        698        864        618
## IL1A                31         54         12         38         39         99
## IL1B                14         94          8         51         23        134
## IL37              2096       1349       1385       2013       2256        949
## IL36RN            2482       2121       3265       2345       3171       1025
## IL1F10             173        116        139        128        148         71
## IL1RN             2256       3367       3228       3507       2917       1878
## NMI                316        479        340        380        429        343
## TNFAIP6            112         88         74         24         78         44
## IFIH1              399        458        412        568        516        355
## SCN1A                2          0          2          1         16          4
## ABCB11              35         57         98        131        157         85
## RBM45              193        215        238        209        224        155
## FRZB               705        709        597        364        609       1058
## TFPI               481        358        317        369        801        403
## STAT1             2157       2476       2509       3266       2692       1836
## NABP1              151        193        120        274        413        191
## SF3B1             8011       7267      11311       8287      11273       6741
## CASP10             326        258        408        573        537        293
## CD28                18         34          2         20         40         17
## CTLA4                5          8          5          3         11          3
## ICOS                 4          4          4         10          9          8
## NDUFS1            2975       3152       3418       3798       2924       2970
## CXCR2               91        155         96         80        111        339
## IRS1              1160        508        831        711        596        646
## CCL20                5         13          1         18         17          8
## ATG16L1           1115       1127       1384       1065       1271        803
## GPR35               40         26        109         76         85         72
## PDCD1               13          8          0         11         10          6
## PPARG              348        594         63        657        238        487
## RPL15            22132      34629      27508      27615      30241      25953
## EOMES               11         12          2          5         13          9
## CX3CR1              39         87         29        119         99         92
## ACKR2               40         36         28         34         32         30
## CCR2                36         99         13         44        120         53
## CCRL2               13          7          4         14         12          5
## TLR9                16          9         11         21         28          9
## ARHGEF3            913        787        958        733        921        736
## ADAMTS9            191        181        220        117        328        225
## TMEM45A          16518      28882      19647      27808      19737      23758
## CD80                 2          3          6          2          3          2
## CD86                74        144         59        113        145         57
## MIX23              144        292        288        230        281        187
## PLS1               163        145        262        149        143        147
## PTX3                17         45         27         15         47         36
## GOLIM4            1289        968        966        705       1045       1198
## MYNN               571        572        683        585        735        508
## TNFSF10           1118       1744        825       1284       1556       1392
## ADIPOQ            2594        395        148         39         72        565
## SPON2             1355       1187       1678        802       1224       1050
## S100P               63        339        133         70        152        333
## WDR1              6350       7079       6788       6134       5225       5741
## CD38                16          7          3          8          6          6
## PPARGC1A           465        446        659        452        365        821
## TMPRSS11B            0          0          0          0          0          0
## CSN3                 0          0          0          0          0          0
## ALB                  1          2         12          5          4          6
## CXCL8               14          5          1         12          6          8
## CXCL2               13          9         11          3          8          5
## AREG                88        188         86         85         66        198
## CXCL10              24          6         12          6         21         16
## CXCL13               0          2          1          2          6          0
## SPP1               341         23          1         73          7         45
## HERC6              165        271        263        294        235        251
## NFKB1             1561       1826       2285       1849       2176       1623
## SEC24B            1447       1440       2051       1357       1608       1153
## EGF                 97         53        157         75        171         84
## IL2                  0          0          0          1          0          0
## IL21                 0          0          0          0          0          0
## IL21-AS1             0          0          0          1          0          0
## SLC7A11            126        119        176         91        108         53
## IL15                32         82         41         61         97         33
## EDNRA              448        474        232        280        317        566
## TLR2               171        221         95        226        234        345
## FGB                  0          0          0          0          0          1
## DDX60              987        734       1208       1480       1063        673
## SPCS3             2212       2796       3033       2544       2648       2359
## TLR3               133        139        199        130        189         88
## OSMR              1022        886        900        859       1171        793
## GZMK                21         30          9         11         37          4
## GZMA                20         47          2         10         29         17
## ANKRD55              1          1          0         10         11          5
## CENPK               65        167        142        114        138        107
## CAST             11506       9891      13434      12529      10677       9399
## ERAP1             3111       2484       3817       2338       3227       2045
## ERAP2              822       1111        134        213       1967        103
## TNFAIP8            805       1385       1010       1062       1233        669
## CSF2                 0          0          0          1          1          0
## IL5                  1          3          1          1          1          5
## IL13                 0          2          4          1          3          1
## IL4                  1          2          2          1          2          2
## CD14               503        467        203        258        310        317
## CSNK1A1          10375      11738      13508      10108      11252       8012
## PPARGC1B           437        585        440        663        400        539
## TNIP1             2462       3270       2474       2835       2896       2906
## ATOX1              372        631        435        489        526        358
## FAXDC2            2018       3368       1172       4443       2204       3959
## IL12B                0          3          0          3          1          2
## MIR146A              1          0          1          1          1          0
## PDLIM7             907       1396        945        932        684        928
## SERPINB1           640        883        807        629        792        748
## SSR1              3286       4057       3693       3463       3346       3332
## CD83                94        122        145        122        173         67
## SOX4              1730       2168       1976        977       1555       2116
## CMAHP              835        440       1177        615       1520        548
## HLA-A            12585      26067      20679      17078      19566      17953
## HLA-C            13209      21499      20701      19103      17458      12522
## HLA-B            20874      49978      37526      34645      31099      26231
## MICA               312        359        264        356        386        245
## LTA                 15         13         24          8         13         14
## TNF                 26         45         30         98        119         18
## HLA-DRB1          2219       4187       1992       4568       3879       2432
## HLA-DQB1           380       1055        506       3517       2089       1031
## PPARD             2097       1842       1881       2321       1897       1595
## CCND3              978       1288       1058       1378       1235       1169
## VEGFA              878       1519        726       1552       1661       1659
## RUNX2              120        110        132         82        169         93
## IL17A                0          0          0          0          0          0
## IL17F                0          0          0          0          0          0
## PRDM1             1265       1225       1528       1426       1335       1153
## ATG5               471        779        624        673        663        531
## TRAF3IP2          1294       1511       1689       1365       1278       1114
## NCOA7              654        777        702        618        603        476
## SGK1              1197       2360       1373       1624       1589       2975
## IFNGR1            2011       2874       2282       2271       2845       2279
## TNFAIP3           1202        641       2103       1106       1500        474
## SOD2              4798       5813       4309       4501       4793       4581
## LPAL2                5          4          3          2         12          2
## PLG                  8          4          4          7         11          3
## CCR6               118        115         70        139        187         74
## RAC1              8307      12185      11561       8694       9341       7646
## ZNF316            1950       2128       2320       2142       3170       2262
## AHR               2645       3104       4163       2350       2837       2100
## IL6                  3          3          0          4          2          9
## TOMM7             2060       5023       3257       3747       4022       4065
## CYCS              2044       3275       2255       2822       2336       2521
## AQP1              7066       7897       8550       6047       6638       8942
## NT5C3A            1064       1503        988       1200       1310        944
## EGFR             11049       6420      14845       9228      10970       8883
## CD36              3693       1766        602       1998       1285       4896
## SAMD9              223        158        143        351        263        264
## SERPINE1           305         60        125        167        189         71
## CUX1              2506       2157       2663       2297       2457       1681
## PSMC2             2155       3391       2949       2778       2584       2340
## NAMPT             1635       2633       2145       1929       1874       2088
## HYAL4               11         24         16         24         17         31
## LEP               1967         32         30         20         55        320
## CALD1             6591       7502       6272       4918       4759       5049
## BRAF              1325        910       1627       1279       1548        989
## EZH2               630        659        700        904       1102        408
## DNAJB6            4156       4317       4403       5132       4001       3678
## CSMD1               99         47         52         98         20         12
## CTSB             10424      12677       8841       9260       9037      15460
## EGR3              3547       3133       3275       3784       3600       2905
## TNFRSF10A          231        140        249        265        235        154
## BNIP3L            4457       7196       5066       7302       4951       6042
## DUSP4              703        930        397       1471        384       1017
## NRG1               609        275        727        564        390        443
## RPL7             21153      37494      28511      29166      27534      27016
## IL7                171        203        281        202        329        157
## GEM                400        258        180        234        332        322
## MYC               1497       1926       2170       1873       1204       1321
## GPT                853       2868        805       3630       1297       1829
## JAK2               952        728       1433        878       1233        684
## CD274               24         25         10         19         28         36
## IL33               706       1106        909       1073       2222       1268
## LURAP1L-AS1          0          5         10          6          1          0
## LURAP1L            330        529        330        334        470        418
## TTC39B            1933       3536       1668       3277       2278       2443
## IFNA1                0          0          0          0          0          0
## TEK                260        140        202        180        269        284
## TRBV20OR9-2          0          3          1          0          0          0
## TOMM5              489       1007        663        739        697        695
## ANXA1             9010       7140       7436       7502       6992       6793
## ERP44             1162       1463       1524       1394       1411       1209
## ZNF483             242        226        260        146        210        241
## TNFSF15             16         28         10         41         57         34
## TLR4               550        164        221        161        300        165
## PTGS1             7176       7913      12507       8130      10966       4866
## HSPA5             6792      10163       9492      10509       6445       8731
## FNBP1             2472       2006       2942       2185       2671       1831
## CARD9               59        216         90        219        242        136
## TRAF2              330        517        291        547        517        392
## CLIC3             1330       1781       1635       1338       2065       1191
## IL2RA                5         16          0         16         11          9
## GATA3             6087       7146      12256       6969       9929       4166
## VIM              28366      26066      20460      15420      21700      21404
## CREM               214        341        278        261        246        306
## DKK1                50         19         12          6         46         29
## MBL2                 0          0          0          0          0          0
## SAR1A             2160       2878       2816       2647       2519       2187
## PRF1                36         54          7         37         42         62
## ZMIZ1             4281       2514       4675       3063       3340       3164
## IFIT3              268        375        274        240        331        282
## TALDO1            2578       5229       2807       5300       3001       3803
## IGF2               426        435        445        388        326        604
## INS-IGF2           392        402        417        369        298        558
## IGF2-AS              0          1          6          1          2          0
## INS                  0          0          0          0          0          0
## STIM1             2961       2991       4302       2847       3312       2883
## TRIM22             475        675        507        864        992        488
## PTH                  0          0          0          0          0          0
## SAA1               308        559         33        739        223        371
## SLC1A2             105         65         92        106         74         78
## FOSL1              141         52         86         58         24         20
## CCND1            11068       8542      14699      11294      14292       9331
## JRKL               260        364        211        340        433        208
## MMP7               249        248        369        213        744        303
## MMP1                 3         13         12          0          4          7
## MMP3                 4          6         26          2          8          0
## CASP1              705       1054        935        878       1099       1051
## DLAT               959       1017       1162       1044        873        797
## IL18              2476       3997       2858       2887       3451       2434
## CD3E               110        116         24         86        133         60
## MCAM              3093       3596       2302       2642       1320       2999
## WNK1             10162       5564      14185       9769       9489       7166
## TNFRSF1A          3265       4944       3873       3697       3518       2855
## GAPDH            15763      36535      25066      22449      19800      24867
## CD4                563        639        269        367        604        372
## SLC2A3             207        289        195        244        211        304
## CLEC4D               1          0          0          1          0          1
## KLRB1               16         26          4          9         15         14
## CD69                17         38         12         26         49         11
## CLEC2B             695       1502       1698       1199       1546        854
## OLR1                 0          2          1          2          2          2
## ABCD2              107          9          9          7         20         14
## LRRK2              489        307        595        328        487        437
## VDR               1817       2566       1815       2411       1921       2259
## TMBIM6           14568      21004      13778      24202      14601      17724
## PFDN5             2802       5099       3473       4206       4267       3939
## SP7                  1          3          0          1          0          0
## MUCL1             1647       4229       2999       1612       1525       5480
## CD63              6722      10956       5945       6053       6961       7490
## SUOX              3126       4079       3816       4069       3496       2972
## RPS26             1394       4768       1922       1021       1894       1850
## RPL41             6213      13884       8456      10131       9608       9655
## IL23A                6         19         20          9          6         12
## DDIT3              221        394        244        355        382        305
## IFNG                 1          1          0          1          0          0
## IL22                 0          0          0          0          1          0
## LYZ                769       1136        167        815       1574        473
## CAPS2               78         77         94         57        103         90
## SYT1                29         16         19         26         26         39
## BTG1              4988      11168      10120       6672       9743       7812
## HCAR3              351        480        359        460        628        137
## GJB2              2654       5518       1373       2192       1745       4174
## GJB6              1554       2651       1482       1560       1306       2085
## TNFSF11              7         43          7         20         16         19
## TPT1             43043      74332      58059      62731      65858      63349
## LMO7              2140       1701       2492       2404       2442       1394
## TNFSF13B            67        118         30         78        145         77
## LAMP1             9444      11222      10416       9920       9086       9081
## PSME2              835       1481        981       1340       1303       1030
## GZMB                13          9          3          3          9         16
## RPL36AL           2169       4541       2505       3484       2835       3489
## PYGL              1750       1867       1684       1484       1856       1250
## HIF1A             1875       1917       2816       1714       1853       2175
## FOS               3176       1249       2428       2538       3933        862
## JDP2              1106       1144       1616        845       1167        999
## NOXRED1             53         49         42         66         73         43
## SERPINA1            44        170          5         32         78        314
## CDC42BPB          4225       2424       4962       3354       4191       3067
## PLA2G4D            794        734        665        986       1156       1612
## TRIM69               9         17         12         24         28          9
## SLC51B              30         38         25         18          7         32
## SMAD3             1503       1310       1862       1757       1740       1310
## CYP1A1               4        115         44        183         95          3
## AKAP13            4934       3148       5814       3503       4221       3615
## MEFV                 8          4          4         11          9         50
## SOCS1               90        156         56        107        137        101
## ATXN2L            2271       1981       2810       2632       3639       1791
## CD19                 1          3         10          5          4          2
## ITGAL              103         95         27         93        169         95
## ITGAM              192        176         60         73        178        109
## ITGAX              109        109         26         99        201        176
## DNAJA2            1963       2607       2655       2384       2563       2042
## SIAH1              860       1304       1546       1194       1131       1063
## ADCY7             1266       1154       1466       1369       1663        816
## NOD2               628        737        917        921        992        464
## CMTM2                3          5          1          6          3         15
## SF3B3             3241       3830       3563       3944       3970       2925
## PSMD7             2080       3724       3312       3162       2446       2743
## WWOX               242        236        270        266        188        156
## MAF               6577       9835      10604       7514       9242       6259
## SLC7A5            1199       2121       2216       2380        980       1543
## CXCL16             852       1301        909       1102        915        956
## XAF1               411        351        308       1186        983        289
## CD68               739        477        224        355        472        305
## PER1              5214       5862       9903       4639       5911       7098
## NOS2                 9          7          3          2          3         13
## TRAF4              474        714        787        827        664        614
## CCL2               104        362        105        188        120        162
## CCL5               126        112         17         50         75         46
## CCL3                 9          2          0          2          8          0
## CCR7                28         25          9         33         48         33
## STAT3             6360       8240       6590       7011       6791       6613
## SOST                 0          2          0          0          0          7
## ITGA2B              20         20         16         31         41         32
## EFCAB13            129        105         86        154        193         69
## NPEPPS            2839       2956       3552       3407       4068       2336
## TBX21               15          2          1          6          4          9
## COL1A1           19917     162326       9996      16923      25004      26661
## MRPS23             477        815        690        682        596        559
## MIR21                1          2          1          1          1          3
## RPS6KB1           1028       1026       1453       1107       1251        880
## ACE                560        366        398        280        514        461
## ERN1               952        809       1025        989       1274        856
## PRTN3                1          1          0          1          1          0
## PLIN5              615       3658        113       5683        963       3251
## RETN                 3          2          0          0          4          0
## CCL25                2          0          0          0          0          2
## ICAM1              394        296        338        248        356        294
## TYK2              1808       1854       2541       2304       2990       1742
## SMARCA4           3267       3694       4356       3838       3604       3141
## ACP5               771       1538        629       1685        947       1305
## JUNB              3145       5139       3482       4058       4337       3645
## CYP4F22           2554       4202       2772       3613       2316       2465
## JAK3               107         99         74         98        165        127
## JUND              6747       7217       8089       4788       5302       6037
## CEBPA             6179      11674      13441       9268       7868       7214
## CEBPG             2023       2153       3046       2657       2998       2082
## ACP7               468        624        277        623        649        629
## ZFP36             1997       1909       2082       1801       2279       1506
## TGFB1             1302       1245       1233       1204       1073        973
## RPS19            13733      28522      21456      20894      20443      18581
## PSG2                 1          2          3          4          7          0
## FUT2                74        169        108        123        167         87
## NKG7                22         29          4         15         36         29
## ZNF415              96        135         86        283        198        206
## LILRB2              39         36          6         23         47         54
## LILRA5               4          0          0          1          4         10
## KIR3DL1              0          0          0          1          1          0
## KIR3DL2              0          0          0          0          1          1
## SMOX               488        708        383        484        357        592
## BMP2              1225        509       2343        601        852        371
## GINS1              125        302        163        227        143        160
## PLCG1             2423       1966       2821       2847       3077       2263
## YWHAB             8129      12701      10124       9645       8576       8834
## PI3                105        273         89        116        165        114
## MMP9               348         99         38        121        139         73
## CD40               296        404        406        458        586        219
## PFDN4              215        402        273        278        317        256
## BMP7              2028       1934       2461       1728       1540       1161
## RPS21             4525      11263       8063       7668       8678       8817
## MX1                396        360        366        667        348        307
## PFKL              5251       7211       5398       6916       6265       5573
## IL17RA            1163        972       1191       1302       1077       1069
## UBE2L3            1623       2717       1741       2234       1906       1849
## TPST2              633        897        744        812       1041        838
## XBP1              2934       5311       3473       4887       3069       3900
## UQCR10             544       1418        655       1159        802       1044
## SEC14L2            353        763        369        465        208        828
## APOL6              683        724        408        609        609        562
## APOL1              242        247        207        192        302        235
## TYMP               778       2415        432       2030       1240       1491
## BEND2                0          0          0          0          0          1
## PHEX                12         18         16         15         22         17
## TIMP1              853       1309        574        552       1241        597
## FOXP3               51         75         68        109         78         66
## MSN               7847       6392       7892       5649       6914       5681
## PGK1              5948       9950       7683       7460       6137       6637
## LAMP2             5778       7769       7263       6398       6481       5891
## CD40LG              18         28          3         26         40         19
## IRAK1             2386       3442       2769       2889       2395       2483
## COX2             71940     116849     125419     119408      73020     114342
##             GSM6222636 GSM6222637 GSM6222638
## TNFRSF9              7         15          7
## ENO1             14933      15446      13633
## PIK3CD             417        401        486
## PGD               2636       1943       3215
## MTHFR             1416       1154       1181
## TNFRSF1B           644        908        791
## PINK1             2680       2345       2103
## IFNLR1             550        611        509
## RUNX3             1444       1067        827
## SH3BGRL3          3050       3758       2907
## CD52                94        107        186
## IFI6               567        688        636
## ZC3H12A            921        908        711
## UTP11              742        706        598
## JUN               1440       2701       1523
## KANK4              101        181        162
## EFCAB7             206        199        170
## IL23R                0          0          0
## ADGRL2            1124       1164       1265
## GBP3               527        342        289
## GBP1               550        647        488
## GBP5                52         78        125
## TGFBR3            2830       4868       2779
## VCAM1              199        279        174
## PTPN22              27         45         52
## CD160               29         19         11
## FCGR1A               9         12         11
## MCL1              6318       8940       5943
## CTSK              6396      10133       5952
## RORC              1867        914       1528
## S100A9             239       1255       1086
## S100A12              3          5         32
## S100A8             154       1262        880
## IL6R               847        966        759
## RIT1               812        797        552
## BGLAP               32         26         15
## IFI16             2367       2368       2387
## AIM2                13         15         23
## CRP                  1          0          0
## FCGR2A             128        173        238
## HSPA6              167        211        165
## FCGR3A              53         78        130
## SELL                60         51        162
## GLUL              8676       8071       7756
## PTGS2               85        187         77
## CRB1                 7          5          9
## KDM5B             3123       2615       2454
## IL10                 3          6          0
## YOD1              2157       2502       1810
## HHAT               212        202        188
## TRAF5              488        510        343
## NLRP3               21         51         46
## LINC01250            4          1          0
## RPS7             14108      12839      12844
## RSAD2               94        121        119
## FOSL2             8460       8834       7166
## REL               1410       1843       1498
## TGFA               887       1061       1019
## DYSF               395        527        368
## HK2               1588       2040       2287
## CD8A                92         81         78
## CD8B                30         43         35
## EIF5B             6452       5640       4745
## IL1R1             3243       2840       2944
## RGPD6              824       1158        641
## IL1A                55         46         81
## IL1B                44          4         68
## IL37              1865       1372       1476
## IL36RN            2469       1973       1423
## IL1F10             131        220        115
## IL1RN             1959       1895       1860
## NMI                392        356        368
## TNFAIP6             81        312         48
## IFIH1              511        224        433
## SCN1A               16          7         11
## ABCB11             113         44         92
## RBM45              244        182        181
## FRZB              1387        829        771
## TFPI               582        691        439
## STAT1             2536       2663       2753
## NABP1              251        238        229
## SF3B1            10577      11071       8462
## CASP10             422        542        332
## CD28                19         16         50
## CTLA4                1         13         26
## ICOS                 2          5         22
## NDUFS1            3449       2415       3399
## CXCR2              144         69        218
## IRS1               725       1290        757
## CCL20               40          5          1
## ATG16L1           1259       1232        997
## GPR35               73         81         79
## PDCD1               14          8         12
## PPARG              439        532        515
## RPL15            29355      23562      26452
## EOMES               12         13         15
## CX3CR1             131         50        142
## ACKR2               37         62         42
## CCR2                76         52        106
## CCRL2               19         20          4
## TLR9                15          9         11
## ARHGEF3            885        872        757
## ADAMTS9            287        349        206
## TMEM45A          14857      16142      15786
## CD80                 1          1          2
## CD86                85         96        100
## MIX23              250        176        185
## PLS1               142        136        179
## PTX3               109         51         37
## GOLIM4            1446       1733        979
## MYNN               745        691        568
## TNFSF10           1494       1335       1330
## ADIPOQ             823       4751       1189
## SPON2             1766       1572       1022
## S100P              216        183        177
## WDR1              7973       7484       5620
## CD38                 9         39          6
## PPARGC1A           732        410        737
## TMPRSS11B            0          0          0
## CSN3                 0          0          0
## ALB                 13         15          8
## CXCL8                5          5          3
## CXCL2                7         30          4
## AREG                49        121        109
## CXCL10              11         44         17
## CXCL13               0          0          0
## SPP1                 9          9          4
## HERC6              291        221        250
## NFKB1             2291       1823       1603
## SEC24B            1676       1903       1410
## EGF                133         86         70
## IL2                  0          0          0
## IL21                 0          0          0
## IL21-AS1             1          0          0
## SLC7A11            100        225        132
## IL15                76         94         62
## EDNRA              503        395        467
## TLR2               182        177        235
## FGB                  0          0          1
## DDX60             1267        822        873
## SPCS3             2871       3047       2295
## TLR3               150        158        190
## OSMR              1062       1438       1053
## GZMK                18         19         32
## GZMA                36         33         33
## ANKRD55              3          1          3
## CENPK              120         79        102
## CAST             11088      12048       9743
## ERAP1             3353       3048       2619
## ERAP2              151       1244        181
## TNFAIP8           1062        774        927
## CSF2                 0          0          0
## IL5                  5          4          2
## IL13                 2          6          0
## IL4                  2          3          2
## CD14               351       1006        645
## CSNK1A1          10533      10310       8067
## PPARGC1B           604        360        499
## TNIP1             3022       3142       2759
## ATOX1              491        510        452
## FAXDC2            2471       1149       2954
## IL12B                1          1          1
## MIR146A              0          0          1
## PDLIM7            1700       1624        977
## SERPINB1           910       1104        840
## SSR1              3831       3454       3260
## CD83               161        129        106
## SOX4              1959       1929       1535
## CMAHP              960        681        486
## HLA-A            21808      23513      23468
## HLA-C            28420      22228      19459
## HLA-B            40077      32914      31482
## MICA               358        392        194
## LTA                 14         26         16
## TNF                 60         46         56
## HLA-DRB1          3520       5345       5198
## HLA-DQB1           489       2244       2291
## PPARD             1736       1895       1660
## CCND3             1327       1248       1069
## VEGFA              784       1059       1460
## RUNX2              169        165        135
## IL17A                0          0          0
## IL17F                0          0          0
## PRDM1             1179       1258       1278
## ATG5               764        679        641
## TRAF3IP2          1413       1305       1190
## NCOA7              729        915        751
## SGK1              2192       1851       1308
## IFNGR1            2561       2739       2508
## TNFAIP3            828       1115        731
## SOD2              5835       5471       5096
## LPAL2                8         11         11
## PLG                  2          3          8
## CCR6               132         89        129
## RAC1              9716       9490       7582
## ZNF316            2699       2863       1976
## AHR               2701       3437       2367
## IL6                  1         17          4
## TOMM7             3954       3822       4152
## CYCS              2703       2137       2335
## AQP1             12863      14865       8721
## NT5C3A            1002        982       1096
## EGFR             12151      10421       9519
## CD36              3694       7114       4080
## SAMD9              243        124        280
## SERPINE1           146        322         95
## CUX1              2446       2237       2065
## PSMC2             2836       2992       2346
## NAMPT             2084       2025       2241
## HYAL4               16         19         35
## LEP                488       2418        713
## CALD1            10898       6721       6064
## BRAF              1450       1518       1107
## EZH2               788        667        588
## DNAJB6            4386       3908       3403
## CSMD1              123         49         35
## CTSB              8857      12456       9401
## EGR3              2888       2578       2795
## TNFRSF10A          243        298        175
## BNIP3L            5031       5691       6257
## DUSP4              667        504       1019
## NRG1               264        739        229
## RPL7             29231      22518      25762
## IL7                230        161        138
## GEM                350        437        247
## MYC               1563       1684       1069
## GPT               1517        408       1370
## JAK2              1255       1124       1022
## CD274               31         41         25
## IL33              1175       1038       1294
## LURAP1L-AS1          4          9          9
## LURAP1L            460        513        456
## TTC39B            1827       1278       2748
## IFNA1                0          0          0
## TEK                435        386        331
## TRBV20OR9-2          1          0          0
## TOMM5              726        751        660
## ANXA1             7942      12059       6413
## ERP44             1379       1555       1142
## ZNF483             257        281        224
## TNFSF15             42         12         41
## TLR4               373        601        292
## PTGS1             9932       8850       5873
## HSPA5             8933       7674       7536
## FNBP1             2801       2451       2179
## CARD9              198         93        149
## TRAF2              470        311        386
## CLIC3             1613       1196       1005
## IL2RA                7         27         22
## GATA3             8736       6057       6211
## VIM              27502      42810      25233
## CREM               313        401        266
## DKK1                21         49          9
## MBL2                 0          2          0
## SAR1A             3136       3417       2508
## PRF1                25         31         66
## ZMIZ1             3877       4280       3634
## IFIT3              344        552        513
## TALDO1            3168       2560       3663
## IGF2               565        841        585
## INS-IGF2           524        753        542
## IGF2-AS              4          3          8
## INS                  0          0          0
## STIM1             3859       3526       2702
## TRIM22             680        833        800
## PTH                  0          0          0
## SAA1               496       3901        452
## SLC1A2             183         73        114
## FOSL1               32        152         18
## CCND1            13015       8574       7425
## JRKL               323        204        275
## MMP7               236        333        307
## MMP1                 3         14          7
## MMP3                 3         23          0
## CASP1              896        777        834
## DLAT              1125       1075       1096
## IL18              2598       2123       2290
## CD3E                68         51        141
## MCAM              4591       6352       3534
## WNK1              9487      11192       9041
## TNFRSF1A          3872       3541       3353
## GAPDH            24329      25029      23798
## CD4                562        719        701
## SLC2A3             265        520        305
## CLEC4D               1          0         13
## KLRB1               15          8         16
## CD69                 9         28         32
## CLEC2B            1254       1434       1138
## OLR1                 0          0          1
## ABCD2               48        220         80
## LRRK2              507        645        451
## VDR               2289       2137       2333
## TMBIM6           18021      13187      18023
## PFDN5             4280       3696       3788
## SP7                  5          3          1
## MUCL1             5705       1838       2708
## CD63              9843      12867       8238
## SUOX              3588       2847       2919
## RPS26             2035       1539       1813
## RPL41             9274       7100       7963
## IL23A               15         17          6
## DDIT3              400        351        300
## IFNG                 0          1          1
## IL22                 0          0          0
## LYZ                434        546       1218
## CAPS2              107         77         95
## SYT1                46         67         39
## BTG1              8378       6164       7999
## HCAR3              263        247        196
## GJB2              2143       3681       2697
## GJB6              1895       2993       1775
## TNFSF11              5          5          6
## TPT1             67536      61483      60565
## LMO7              2337       2045       1521
## TNFSF13B            78        123        147
## LAMP1            11831      10477       9299
## PSME2             1262       1151       1073
## GZMB                13          7         15
## RPL36AL           3449       2631       3146
## PYGL              1681       2133       1957
## HIF1A             2676       2678       2390
## FOS               1800       2712       1399
## JDP2              1296       1707        889
## NOXRED1             50         33         31
## SERPINA1            48        164        186
## CDC42BPB          4696       4646       3405
## PLA2G4D            575        489        637
## TRIM69              35         19         10
## SLC51B              43         28         24
## SMAD3             1825       1685       1640
## CYP1A1              15         52        146
## AKAP13            4062       6188       3454
## MEFV                14         17         44
## SOCS1              106        141         95
## ATXN2L            2821       2174       1821
## CD19                13         15          6
## ITGAL              100         59        195
## ITGAM              128        245        243
## ITGAX              109         77        218
## DNAJA2            2583       2605       2073
## SIAH1             1196       1237       1029
## ADCY7             1408       1368       1253
## NOD2               583        748        500
## CMTM2                9          4         16
## SF3B3             3699       2509       3359
## PSMD7             2797       2979       2582
## WWOX               294        338        157
## MAF               9153       7892       6820
## SLC7A5            2340       1019       2806
## CXCL16            1111        983        768
## XAF1               554        401        538
## CD68               413       1061        654
## PER1              2423       7782       2013
## NOS2                64         16         27
## TRAF4              874        636        628
## CCL2               287        152        131
## CCL5                61         85        155
## CCL3                 2          5          0
## CCR7                22         53         31
## STAT3             7536       6665       7088
## SOST                 9          0          5
## ITGA2B              30         20         41
## EFCAB13            120        102         88
## NPEPPS            3548       3320       2865
## TBX21                3          7          7
## COL1A1           57202     207331      36821
## MRPS23             774        745        585
## MIR21                0          0          1
## RPS6KB1           1411       1498       1104
## ACE                556       1199        693
## ERN1              1036       1134        973
## PRTN3                0          2          0
## PLIN5             1092        174       1834
## RETN                 4          0          6
## CCL25                0          0          0
## ICAM1              757        796        451
## TYK2              2602       2660       1849
## SMARCA4           4217       3450       3216
## ACP5               704        749        907
## JUNB              3548       5264       3565
## CYP4F22           2721       2257       2376
## JAK3               117        112        158
## JUND              5000      10082       4473
## CEBPA             7421      10130       7565
## CEBPG             2514       2546       2076
## ACP7               359        344        495
## ZFP36             1480       2848       1488
## TGFB1             1258       1686       1184
## RPS19            21909      18619      17280
## PSG2                 0          3          3
## FUT2               128         99        103
## NKG7                25         21         47
## ZNF415             229         66        212
## LILRB2              27         91        105
## LILRA5               7          5          4
## KIR3DL1              0          0          0
## KIR3DL2              0          0          0
## SMOX               583        388        583
## BMP2               698        936        621
## GINS1              212        126        152
## PLCG1             2947       3042       2365
## YWHAB            10719       9419       8505
## PI3                130        124        135
## MMP9                51        104        109
## CD40               383        344        294
## PFDN4              297        371        267
## BMP7              1831       1923       1359
## RPS21             8643       7188       7859
## MX1                376        682        613
## PFKL              6077       5365       5702
## IL17RA            1352       1159       1272
## UBE2L3            1900       1806       1816
## TPST2              722        966        663
## XBP1              4711       3080       3835
## UQCR10             960        742        982
## SEC14L2            340        599        367
## APOL6              771       1166        800
## APOL1              418        382        359
## TYMP               942       1324       1398
## BEND2                0          0          4
## PHEX                23         26         16
## TIMP1             1040       2214        997
## FOXP3               70         56         84
## MSN               7737       8447       6742
## PGK1              7086       7425       6810
## LAMP2             6949       5656       6274
## CD40LG               6         10         26
## IRAK1             3177       3623       2457
## COX2            118935      79507      86169

4. Selection of samples

Selection of samples related to psoriatic arthritis from the dataset

# sample selection
gsms <- "000000000111111111222222222"
sml <- strsplit(gsms, split="")[[1]]

# Filter out excluded samples (marked as "X")
sel <- which(sml != "X")
sml <- sml[sel]
expr_data <- expr_data_filtered[, sel]

# group membership for samples
gs <- factor(sml)
groups <- make.names(c("control","lesion PsA","non lesion PsA"))
levels(gs) <- groups
sample_info <- data.frame(Group = gs, row.names = colnames(expr_data))

# pre-filter low count genes

dsa <- DESeqDataSetFromMatrix(countData=expr_data, colData=sample_info, design= ~Group)
dsb <- DESeq(dsa, test="LRT", reduced = ~ 1)  # Use LRT for all-around gene ranking
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 3 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
# extract results for top genes table
r <- results(dsb, alpha=0.05, pAdjustMethod ="fdr")

plotDispEsts(dsb, main="GSE205748 Dispersion Estimates")

# create histogram plot of p-values
hist(r$padj, breaks=seq(0, 1, length = 21), col = "grey", border = "white",
     xlab = "", ylab = "", main = "GSE205748 Frequencies of padj-values")

#Depois de filtrar quais os grupos apresentam algum genes expressos
cts <- list(c("Group",groups[2],groups[1]),
            c("Group",groups[3],groups[1]),
            c("Group",groups[3],groups[2]))

# Wald test to obtain contrast-specific results
dsc <- DESeq(dsa, test="Wald", sfType="poscount")
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 4 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
r <- results (dsc, contrast=cts[[1]], alpha=0.05, pAdjustMethod = "fdr")

# Venn diagram
library(gplots)
all_res <- list()

for (ct in cts) {
  i <- length(all_res)
  r <- results(dsc, contrast=ct, alpha=0.05, pAdjustMethod = "fdr")
  all_res[[i + 1]] <- rownames(r)[!is.na(r$padj) & r$padj < 0.05 & abs(r$log2FoldChange) >= 1]
  names(all_res)[i + 1] <- paste(ct, collapse="_")
}
venn(all_res)

5. Differential gene expression analysis

#  Differential gene expression analysis
df <- as.data.frame(r)
df <- na.omit(df)

topGenes <- rownames(df)
heatmapData <- expr_data[topGenes, ]

# Load the necessary libraries
library(pheatmap)
library(RColorBrewer)

# Calculate the average expression for each gene
media_dos_genes <- rowMeans(heatmapData)

# Sort the averages in descending order
media_ordenada <- sort(media_dos_genes, decreasing = TRUE)

# Get the names of the first 20 genes
top_20_genes_por_media <- names(head(media_ordenada, 20))

# Filter the original matrix to retain only the 20 selected genes
heatmapData_top20 <- heatmapData[top_20_genes_por_media, ]

# Create column annotation with groups
annotation_data <- data.frame(
  group = sample_info$Group,
  row.names = rownames(sample_info)
)

# Reorder columns by group
ordem_grupo <- order(annotation_data$group)  # Ordena por nome do grupo (alfabética)
heatmapData_top20 <- heatmapData_top20[, ordem_grupo]
annotation_data <- annotation_data[ordem_grupo, , drop = FALSE]

# Install if necessary
# install.packages("ComplexHeatmap")
library(ComplexHeatmap)
library(circlize)

# Matrix of the 20 genes
mat <- heatmapData_top20

#  Reschedule by line
mat_scaled <- t(scale(t(mat)))

# Create group annotation
ha_col <- HeatmapAnnotation(
  Group = annotation_data$group,
  col = list(Group = c("lesion.PsA" = "#d62728",
                       "non.lesion.PsA" = "#1f77b4",
                       "control" = "green"))
)

# Inverted RdBu colour palette
cores <- colorRamp2(c(-2, 0, 2), rev(RColorBrewer::brewer.pal(3, "RdBu")))

# Heatmap with clustering within groups
Heatmap(
  mat_scaled,
  name = "Z-score",
  top_annotation = ha_col,
  col = cores,
  cluster_columns = TRUE,              # << agora está ativado
  cluster_column_slices = TRUE,       # << cluster dentro de cada grupo
  column_split = annotation_data$group, # << separa as colunas por grupo
  cluster_rows = TRUE,
  show_column_names = FALSE,
  show_row_names = TRUE,
  column_title = "Heatmap of the 20 most highly expressed genes (average) from GSE205748",
  heatmap_legend_param = list(title = "Z-score")
)

# UMAP plot (multi-dimensional scaling)
expr_data_umap <- expr_data[rowSums(expr_data) > 0, ] # Remover linhas com soma zero
u <- umap(t(expr_data_umap), n_neighbors=15, random_state=123)

plot(u$layout, main="GSE205748 UMAP", xlab="", ylab="", tcl=0.1, pch=19, col="blue")
text(u$layout, labels=colnames(expr_data_umap), cex=0.7, pos=3)

# Assuming that rownames(df) are gene symbols
gene_symbols <- rownames(df)

# Convert gene symbols to Entrez IDs
gene_entrez_ids <- bitr(gene_symbols, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
## 'select()' returned 1:1 mapping between keys and columns
# First, convert the rownames from df into a column
df$Symbol <- rownames(df)

# Join the original table and the converted IDs
dfd <- merge(df, gene_entrez_ids, by.x = "Symbol", by.y = "SYMBOL", all.x = TRUE)

# Ensure unique symbols
dfd <- dfd[!duplicated(dfd$ENTREZID), ]

6. Comparison between peripheral blood samples from PsA patients and control samples.

# Wald test to obtain contrast-specific results
dsd <- DESeq(dsa, test = "Wald", sfType = "poscount")
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 4 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
r <- results(dsd, contrast = c("Group", groups[3], groups[2]), alpha = 0.05, pAdjustMethod = "fdr")


# Filter only significant genes
sig_genes <- subset(r, padj < 0.05 & abs(log2FoldChange) >= 1)


# volcano plot
old.pal <- palette(c("#00BFFF", "#FF3030")) # low-hi colors
par(mar=c(4,4,2,1), cex.main=1.5)
with(sig_genes, {
  plot(log2FoldChange, -log10(padj), main=paste(groups[3], "vs", groups[2]),
       xlab="log2FC", ylab="-log10(Padj)", pch=20, cex=0.5)
  text(log2FoldChange, -log10(padj), labels=rownames(r), cex=0.6, pos=4)
})
with(subset(r, padj<0.05 & abs(log2FoldChange) >= 1),
     points(log2FoldChange, -log10(padj), pch=20, col=(sign(log2FoldChange) + 3)/2, cex=1))
legend("bottomleft", title=paste("Padj<", 0.05, sep=""), legend=c("down", "up"), pch=20,col=1:2)

# Plot only significant genes
#MD PLOT
par(mar=c(4,4,2,1), cex.main=1.5)
with(sig_genes, {
  plot(log10(baseMean), log2FoldChange,
       main=paste(groups[3], "vs", groups[2]),
       xlab="log10(mean of normalized counts)", ylab="log2FoldChange",
       pch=20, col=(sign(log2FoldChange) + 3)/2, cex=1)
  text(log10(baseMean), log2FoldChange, labels=rownames(sig_genes), cex=0.6, pos=4)
})
legend("bottomleft", title=paste("Padj<", 0.05, sep=""), legend=c("down", "up"), pch=20, col=1:2)
abline(h=0)

palette(old.pal) # restaurar paleta

# Add the gene symbols to the points on the graph.

library(ggplot2)
library(ggrepel)

plotVolcano <- function(res, title = "Volcano Plot") {
  res$group <- "NS"
  res$group[res$padj < 0.05 & res$log2FoldChange > 1] <- "Up"
  res$group[res$padj < 0.05 & res$log2FoldChange < -1] <- "Down"
  
  res$label <- ifelse(res$group != "NS", rownames(res), NA)
  
  ggplot(res, aes(x = log2FoldChange, y = -log10(padj), color = group)) +
    geom_point(alpha = 0.7, size = 2) +
    geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
    geom_vline(xintercept = c(-1, 1), linetype = "dashed") +
    scale_color_manual(values = c("Up" = "firebrick", "Down" = "dodgerblue", "NS" = "grey80")) +
    ggrepel::geom_text_repel(aes(label = label), size = 3, max.overlaps = Inf) +
    theme_minimal() +
    labs(title = title, x = "log2 Fold Change", y = "-log10 adjusted p-value", color = "Regulation")
}

# Example of function usage
plotVolcano(r, paste(groups[3], "vs", groups[2]))
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 265 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

# Genes upregulated
up_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange >= 1))

# Genes downregulated
down_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange <= -1))

# All DEGs with |log2FC| > 2
de_genes <- c(up_genes, down_genes)

# Display gene symbols before conversion
expr_datasa <- as.data.frame(expr_data[rownames(expr_data) %in% de_genes, ])

# Convert expression data to long format for ggplot2
expr_datas <- as.data.frame(expr_datasa)
expr_datas$Gene <- rownames(expr_datas)
expr_datas <- pivot_longer(expr_datas, cols = -Gene, names_to = "Sample", values_to = "Expression")
expr_datas <- merge(expr_datas, pheno_data, by.x = "Sample", by.y = "geo_accession")

# Check column names and a sample of the data
expresse <- r[order(r$padj)[1:429], ]
expresse <- merge(as.data.frame(expresse), expr_datasa, by = 0, sort = F)


library(dplyr)
library(tibble)
library(clusterProfiler)
library(ggplot2)

r_df <- as.data.frame(r)

# This ensures that gene names are preserved
r_df_com_genes <- r_df %>%
  rownames_to_column(var = "Gene") 

# --- Now, the rest of your code works perfectly ---
top30_fc <- r_df_com_genes %>%
  # O 'filter' também pode ser conflitante, é bom usar o prefixo
  dplyr::filter(padj < 0.05 & abs(log2FoldChange) > 1) %>%
  dplyr::arrange(desc(abs(log2FoldChange))) %>%
  # Esta é a correção principal:
  dplyr::slice(1:30) %>%
  dplyr::mutate(
    Regulation = case_when(
      log2FoldChange > 1 ~ "Upregulated",
      log2FoldChange < -1 ~ "Downregulated"
    )
  )


# Graph with positive and negative bars (your code here is perfect)
ggplot(top30_fc, aes(x = reorder(Gene, log2FoldChange), y = log2FoldChange, fill = Regulation)) +
  geom_col() +
  coord_flip() + # Coloca os genes no eixo Y
  scale_fill_manual(values = c("Upregulated" = "steelblue", "Downregulated" = "tomato")) +
  scale_y_continuous(breaks = seq(-8, 4, by = 1)) + # Ajuste os limites se necessário
  theme_bw() +
  labs(
    title = "The 30 most prominent DEGs between non-lesion and lesion PsA (GSE205748)",
    x = "Gene",
    y = "log2 Fold Change",
    fill = "Regulation"
  ) + 
  theme(
    axis.text.y = element_text(color = "black", face = "bold", size = 9)
  )

# --- Select the 20 MOST overexpressed genes ---
up_20genes <- expresse %>%
  filter(padj < 0.05 & log2FoldChange > 1) %>%
  arrange(desc(log2FoldChange)) %>% 
  head(20) %>%
  pull(Row.names)

# --- Filters expression data ---
expr_datas_filtrado_up <- expr_datas %>%
  filter(`tissue type:ch1` %in% c("Psoriatic arthritis skin uninvolved", "Psoriatic arthritis skin lesion")) %>%
  filter(Gene %in% up_20genes)

# --- Calculate the average per gene and per group ---
expr_datasa_up <- expr_datas_filtrado_up %>%
  group_by(Gene, `tissue type:ch1`) %>%
  summarise(
    mean_expression = mean(Expression, na.rm = TRUE),
    sd_expression = sd(Expression, na.rm = TRUE),
    .groups = 'drop'
  )

# --- Preparation for the chart ---
dados_para_plotar <- expr_datasa_up %>%
  filter(`tissue type:ch1` == "Psoriatic arthritis skin uninvolved")

# --- Graph Generation ---
ggplot(dados_para_plotar, aes(x = mean_expression, y = reorder(Gene, mean_expression))) +
  geom_col(fill = "steelblue", color = "black", width = 0.7) +
  labs(
    title = "Average Expression of the Main Upregulated Genes in Psoriatic arthritis skin uninvolved", # Título mais claro
    subtitle = "For non-lesion PsA vs lesion PsA (GSE205748)",
    x = "Average Expression in the Group 'skin uninvolved'", # Rótulo do eixo mais claro
    y = "Gene"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid.major.y = element_blank(),
    axis.text.y = element_text(size = 12)
  )

# --- Selects the 20 MOST under-expressed genes ---
down_20genes <- expresse %>%
  filter(padj < 0.05 & log2FoldChange < -1) %>%
  arrange(log2FoldChange) %>% # <-- Sort from most negative to least negative
  head(20) %>%
  pull(Row.names)

# --- Filters expression data for genes and groups of interest ---
expr_datas_filtrado_down <- expr_datas %>%
  filter(`tissue type:ch1` %in% c("Psoriatic arthritis skin uninvolved", "Psoriatic arthritis skin lesion")) %>%
  filter(Gene %in% down_20genes)

# --- Calculate the average per gene and per group ---
expr_datasa_down <- expr_datas_filtrado_down %>%
  group_by(Gene, `tissue type:ch1`) %>%
  summarise(
    mean_expression = mean(Expression, na.rm = TRUE),
    sd_expression = sd(Expression, na.rm = TRUE),
    .groups = 'drop'
  )

# --- Preparation for the chart ---'
dados_para_plotar <- expr_datasa_down %>%
  filter(`tissue type:ch1` == "Psoriatic arthritis skin uninvolved")

# --- Graph Generation ---
ggplot(dados_para_plotar, aes(x = mean_expression, y = reorder(Gene, -mean_expression))) +
  geom_col(fill = "red", color = "black", width = 0.7) +
  labs(
    title = "Average Expression of the Main Downregulated Genes in Psoriatic arthritis skin uninvolved", # Título mais claro
    subtitle = "For non-lesion PsA vs lesion PsA (GSE205748)",
    x = "Average Expression in the Group 'skin uninvolved'", # Rótulo do eixo mais claro
    y = "Gene"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid.major.y = element_blank(),
    axis.text.y = element_text(size = 12)
  )

# --- Create 'count_matrix' ---
up_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange > 1))
down_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange < -1))
genes_interesse <- c(up_genes, down_genes)

# Filter only Psoriatic Arthritis (lesion and non-lesion)
expr_filtrado <- expr_datas %>%
  filter(`tissue type:ch1` %in% c("Psoriatic arthritis skin uninvolved", "Psoriatic arthritis skin lesion")) %>%
  filter(Gene %in% genes_interesse)

matriz_contagem <- expr_filtrado %>%
  select(Gene, Sample, Expression) %>%
  pivot_wider(names_from = Sample, values_from = Expression, values_fill = 0) %>%
  column_to_rownames(var = "Gene")


# Filter your original “sample_info” object to keep only the samples that are in your final matrix.
# This ensures that the annotation corresponds exactly to the heatmap data.
sample_info_filtrado <- subset(sample_info, rownames(sample_info) %in% colnames(matriz_contagem))


# Select the 20 most highly expressed genes 
media_dos_genes <- rowMeans(matriz_contagem)
media_ordenada <- sort(media_dos_genes, decreasing = TRUE)
top_20_genes_por_media <- names(head(media_ordenada, 20))
heatmapData_top20 <- as.matrix(matriz_contagem[top_20_genes_por_media, ])


# Create the annotation for the heatmap from the already filtered information.
annotation_data <- data.frame(
  Group = sample_info_filtrado$Group,
  row.names = rownames(sample_info_filtrado)
)

# Ensure that the order of the columns in the heatmap and annotation is the same.
heatmapData_top20 <- heatmapData_top20[, rownames(annotation_data)]


# --- Heatmap generation with ComplexHeatmap ---

library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)

# Rescale data by row (Z-score)
mat_scaled <- t(scale(t(heatmapData_top20)))

# Create the column annotation
ha_col <- HeatmapAnnotation(
  Group = annotation_data$Group,
  col = list(Group = c("non.lesion.PsA" = "#1f77b4", "lesion.PsA" = "#d62728"))
)

# Define the colour palette
cores <- colorRamp2(c(-2, 0, 2), rev(RColorBrewer::brewer.pal(3, "RdBu")))

# Generate the heatmap
Heatmap(
  mat_scaled,
  name = "Z-score",
  top_annotation = ha_col,
  col = cores,
  
  # Divide the columns into groups for clearer viewing
  column_split = annotation_data$Group,
  
  cluster_rows = TRUE,
  cluster_columns = TRUE, # Cluster within each divided group
  
  show_column_names = FALSE,
  show_row_names = TRUE,
  column_title = "Heatmap of the 20 most highly expressed DEGs (non-lesion vs. lesion PsA GSE205748)",
  heatmap_legend_param = list(title = "Z-score")
)

# Filter differentially expressed genes
de_genes <- subset(r, padj < 0.05 & abs(log2FoldChange) > 1)

# Create a data frame from de_genes
de_genes <- data.frame(de_genes)

# Suppose your dataframe is called df and you want to select columns ‘column1’, “column2” and ‘column3’.
de_genes <- de_genes %>% select(log2FoldChange, padj)

# Assuming that rownames(df) are gene symbols
gene_symbols <- rownames(de_genes)

# Convert gene symbols to Entrez IDs
gene_entrez_ids <- bitr(gene_symbols, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
## 'select()' returned 1:1 mapping between keys and columns
# First, convert the rownames from df into a column
de_genes$Symbol <- rownames(de_genes)

# Join the original table and the converted IDs
de_genes <- merge(de_genes, gene_entrez_ids, by.x = "Symbol", by.y = "SYMBOL", all.x = TRUE)

# Ensure unique symbols
de_genes <- de_genes[!duplicated(de_genes$ENTREZID), ]

# View the first results to verify the addition of the IDs
de_genes <- de_genes %>% 
  filter(Symbol %in% rownames(matriz_contagem))
                               

BiocManager::install("GOSemSim")
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
##     CRAN: https://p3m.dev/cran/__linux__/jammy/latest
## Bioconductor version 3.20 (BiocManager 1.30.26), R 4.4.1 (2024-06-14)
## Installing package(s) 'GOSemSim'
## Warning in install.packages(...): installation of package 'GOSemSim' had
## non-zero exit status
## Installation paths not writeable, unable to update packages
##   path: /usr/local/lib/R/library
##   packages:
##     boot, class, cluster, foreign, KernSmooth, lattice, MASS, Matrix, mgcv,
##     nlme, nnet, rpart, spatial, survival
##   path: /usr/local/lib/R/site-library
##   packages:
##     annotate, AnnotationDbi, ape, aplot, askpass, BH, Biobase, BiocFileCache,
##     BiocGenerics, BiocManager, BiocParallel, BiocVersion, biomaRt, Biostrings,
##     bit, bit64, bitops, Boruta, broom, bslib, C50, car, caret, checkmate, chk,
##     classInt, cli, clock, clue, clusterProfiler, colorspace, commonmark,
##     ComplexHeatmap, corrplot, cowplot, cpp11, credentials, crosstalk,
##     cutpointr, dbplyr, dbscan, DelayedArray, DEoptimR, Deriv, DESeq2, devtools,
##     DiceKriging, diffobj, doBy, docopt, DOSE, downloader, DT, dtplyr, edgeR,
##     enrichplot, entropy, evaluate, fastmatch, fgsea, fontawesome, forcats, fs,
##     future, future.apply, gargle, GDCRNATools, genefilter, generics,
##     GenomeInfoDb, GenomeInfoDbData, GenomicDataCommons, GenomicRanges, gert,
##     ggforce, ggfun, ggnewscale, ggplot2, ggplotify, ggpubr, ggraph, ggtree, gh,
##     gld, glmnet, globals, glue, GO.db, googledrive, googlesheets4, GOSemSim,
##     gower, GPArotation, gplots, graph, graphlayouts, gtable, hardhat, haven,
##     HDO.db, here, hms, httpuv, httr2, IRanges, jpeg, jsonlite, KEGGgraph,
##     KEGGREST, keras, KMsurv, knitr, labelled, later, lava, lavaan, lgr, limma,
##     littler, lme4, lmom, locfit, lubridate, magrittr, markdown, MatchIt,
##     MatrixGenerics, MatrixModels, matrixStats, maxstat, mice, mime, miniUI,
##     mlbench, mlr3, mlr3learners, mlr3measures, mlr3misc, mlr3pipelines,
##     modeltools, multcomp, mvtnorm, networkD3, nloptr, org.Hs.eg.db, party,
##     partykit, patchwork, pathview, pbkrtest, pillar, pkgbuild, pkgdown,
##     pkgload, plotly, pROC, processx, prodlim, progressr, PRROC, ps, psych,
##     purrr, quantmod, quantreg, questionr, qvalue, R.cache, R.oo, R.utils, R6,
##     ragg, ranger, Rcpp, RcppArmadillo, RcppTOML, RCurl, readxl, recipes,
##     reshape, rgl, Rgraphviz, rlang, rmarkdown, robustbase, roxygen2, rprojroot,
##     rsq, rstatix, rstudioapi, rversions, rvest, S4Arrays, S4Vectors, sass,
##     scales, scatterpie, sessioninfo, shadowtext, shiny, sp, SparseArray,
##     statmod, stringi, stringr, styler, SummarizedExperiment, survminer, sva,
##     svglite, sys, systemfonts, TCGAbiolinks, TCGAbiolinksGUI.data, tensorflow,
##     testthat, textshaping, tfruns, TH.data, tibble, timeDate, tinytex, treeio,
##     tzdb, UCSC.utils, usethis, utf8, VIM, vroom, waldo, withr, xgboost, XML,
##     xts, XVector, yulab.utils, zeallot, zip, zlibbioc, zoo
## Old packages: 'bbotk', 'Cubist', 'curl', 'data.table', 'ggsci', 'Hmisc',
##   'igraph', 'openssl', 'parallelly', 'promises', 'reformulas', 'reticulate',
##   'RSQLite', 'xfun', 'xml2'
library(GOSemSim)
  
BiocManager::install("enrichplot")
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
##     CRAN: https://p3m.dev/cran/__linux__/jammy/latest
## Bioconductor version 3.20 (BiocManager 1.30.26), R 4.4.1 (2024-06-14)
## Installing package(s) 'enrichplot'
## also installing the dependencies 'DOSE', 'GOSemSim'
## Warning in install.packages(...): installation of package 'GOSemSim' had
## non-zero exit status
## Warning in install.packages(...): installation of package 'DOSE' had non-zero
## exit status
## Warning in install.packages(...): installation of package 'enrichplot' had
## non-zero exit status
## Installation paths not writeable, unable to update packages
##   path: /usr/local/lib/R/library
##   packages:
##     boot, class, cluster, foreign, KernSmooth, lattice, MASS, Matrix, mgcv,
##     nlme, nnet, rpart, spatial, survival
##   path: /usr/local/lib/R/site-library
##   packages:
##     annotate, AnnotationDbi, ape, aplot, askpass, BH, Biobase, BiocFileCache,
##     BiocGenerics, BiocManager, BiocParallel, BiocVersion, biomaRt, Biostrings,
##     bit, bit64, bitops, Boruta, broom, bslib, C50, car, caret, checkmate, chk,
##     classInt, cli, clock, clue, clusterProfiler, colorspace, commonmark,
##     ComplexHeatmap, corrplot, cowplot, cpp11, credentials, crosstalk,
##     cutpointr, dbplyr, dbscan, DelayedArray, DEoptimR, Deriv, DESeq2, devtools,
##     DiceKriging, diffobj, doBy, docopt, DOSE, downloader, DT, dtplyr, edgeR,
##     enrichplot, entropy, evaluate, fastmatch, fgsea, fontawesome, forcats, fs,
##     future, future.apply, gargle, GDCRNATools, genefilter, generics,
##     GenomeInfoDb, GenomeInfoDbData, GenomicDataCommons, GenomicRanges, gert,
##     ggforce, ggfun, ggnewscale, ggplot2, ggplotify, ggpubr, ggraph, ggtree, gh,
##     gld, glmnet, globals, glue, GO.db, googledrive, googlesheets4, GOSemSim,
##     gower, GPArotation, gplots, graph, graphlayouts, gtable, hardhat, haven,
##     HDO.db, here, hms, httpuv, httr2, IRanges, jpeg, jsonlite, KEGGgraph,
##     KEGGREST, keras, KMsurv, knitr, labelled, later, lava, lavaan, lgr, limma,
##     littler, lme4, lmom, locfit, lubridate, magrittr, markdown, MatchIt,
##     MatrixGenerics, MatrixModels, matrixStats, maxstat, mice, mime, miniUI,
##     mlbench, mlr3, mlr3learners, mlr3measures, mlr3misc, mlr3pipelines,
##     modeltools, multcomp, mvtnorm, networkD3, nloptr, org.Hs.eg.db, party,
##     partykit, patchwork, pathview, pbkrtest, pillar, pkgbuild, pkgdown,
##     pkgload, plotly, pROC, processx, prodlim, progressr, PRROC, ps, psych,
##     purrr, quantmod, quantreg, questionr, qvalue, R.cache, R.oo, R.utils, R6,
##     ragg, ranger, Rcpp, RcppArmadillo, RcppTOML, RCurl, readxl, recipes,
##     reshape, rgl, Rgraphviz, rlang, rmarkdown, robustbase, roxygen2, rprojroot,
##     rsq, rstatix, rstudioapi, rversions, rvest, S4Arrays, S4Vectors, sass,
##     scales, scatterpie, sessioninfo, shadowtext, shiny, sp, SparseArray,
##     statmod, stringi, stringr, styler, SummarizedExperiment, survminer, sva,
##     svglite, sys, systemfonts, TCGAbiolinks, TCGAbiolinksGUI.data, tensorflow,
##     testthat, textshaping, tfruns, TH.data, tibble, timeDate, tinytex, treeio,
##     tzdb, UCSC.utils, usethis, utf8, VIM, vroom, waldo, withr, xgboost, XML,
##     xts, XVector, yulab.utils, zeallot, zip, zlibbioc, zoo
## Old packages: 'bbotk', 'Cubist', 'curl', 'data.table', 'ggsci', 'Hmisc',
##   'igraph', 'openssl', 'parallelly', 'promises', 'reformulas', 'reticulate',
##   'RSQLite', 'xfun', 'xml2'
library(enrichplot)
BiocManager::install("STRINGdb")
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
##     CRAN: https://p3m.dev/cran/__linux__/jammy/latest
## Bioconductor version 3.20 (BiocManager 1.30.26), R 4.4.1 (2024-06-14)
## Warning: package(s) not installed when version(s) same as or greater than current; use
##   `force = TRUE` to re-install: 'STRINGdb'
## Installation paths not writeable, unable to update packages
##   path: /usr/local/lib/R/library
##   packages:
##     boot, class, cluster, foreign, KernSmooth, lattice, MASS, Matrix, mgcv,
##     nlme, nnet, rpart, spatial, survival
##   path: /usr/local/lib/R/site-library
##   packages:
##     annotate, AnnotationDbi, ape, aplot, askpass, BH, Biobase, BiocFileCache,
##     BiocGenerics, BiocManager, BiocParallel, BiocVersion, biomaRt, Biostrings,
##     bit, bit64, bitops, Boruta, broom, bslib, C50, car, caret, checkmate, chk,
##     classInt, cli, clock, clue, clusterProfiler, colorspace, commonmark,
##     ComplexHeatmap, corrplot, cowplot, cpp11, credentials, crosstalk,
##     cutpointr, dbplyr, dbscan, DelayedArray, DEoptimR, Deriv, DESeq2, devtools,
##     DiceKriging, diffobj, doBy, docopt, DOSE, downloader, DT, dtplyr, edgeR,
##     enrichplot, entropy, evaluate, fastmatch, fgsea, fontawesome, forcats, fs,
##     future, future.apply, gargle, GDCRNATools, genefilter, generics,
##     GenomeInfoDb, GenomeInfoDbData, GenomicDataCommons, GenomicRanges, gert,
##     ggforce, ggfun, ggnewscale, ggplot2, ggplotify, ggpubr, ggraph, ggtree, gh,
##     gld, glmnet, globals, glue, GO.db, googledrive, googlesheets4, GOSemSim,
##     gower, GPArotation, gplots, graph, graphlayouts, gtable, hardhat, haven,
##     HDO.db, here, hms, httpuv, httr2, IRanges, jpeg, jsonlite, KEGGgraph,
##     KEGGREST, keras, KMsurv, knitr, labelled, later, lava, lavaan, lgr, limma,
##     littler, lme4, lmom, locfit, lubridate, magrittr, markdown, MatchIt,
##     MatrixGenerics, MatrixModels, matrixStats, maxstat, mice, mime, miniUI,
##     mlbench, mlr3, mlr3learners, mlr3measures, mlr3misc, mlr3pipelines,
##     modeltools, multcomp, mvtnorm, networkD3, nloptr, org.Hs.eg.db, party,
##     partykit, patchwork, pathview, pbkrtest, pillar, pkgbuild, pkgdown,
##     pkgload, plotly, pROC, processx, prodlim, progressr, PRROC, ps, psych,
##     purrr, quantmod, quantreg, questionr, qvalue, R.cache, R.oo, R.utils, R6,
##     ragg, ranger, Rcpp, RcppArmadillo, RcppTOML, RCurl, readxl, recipes,
##     reshape, rgl, Rgraphviz, rlang, rmarkdown, robustbase, roxygen2, rprojroot,
##     rsq, rstatix, rstudioapi, rversions, rvest, S4Arrays, S4Vectors, sass,
##     scales, scatterpie, sessioninfo, shadowtext, shiny, sp, SparseArray,
##     statmod, stringi, stringr, styler, SummarizedExperiment, survminer, sva,
##     svglite, sys, systemfonts, TCGAbiolinks, TCGAbiolinksGUI.data, tensorflow,
##     testthat, textshaping, tfruns, TH.data, tibble, timeDate, tinytex, treeio,
##     tzdb, UCSC.utils, usethis, utf8, VIM, vroom, waldo, withr, xgboost, XML,
##     xts, XVector, yulab.utils, zeallot, zip, zlibbioc, zoo
## Old packages: 'bbotk', 'Cubist', 'curl', 'data.table', 'ggsci', 'Hmisc',
##   'igraph', 'openssl', 'parallelly', 'promises', 'reformulas', 'reticulate',
##   'RSQLite', 'xfun', 'xml2'
library(STRINGdb)

#Alternative Workflow with STRINGdb Integration in R
# Initialise the STRINGdb object for a specific organism
string_db <- STRINGdb$new(version = "12", species = 9606, score_threshold = 400, input_directory = "")

options(timeout = 600)  # increases to 10 minutes

# Gene mapping using STRINGdb
mapped_genes <- string_db$map(de_genes, "Symbol", removeUnmappedRows = TRUE)
## Warning:  we couldn't map to STRING 2% of your identifiers
# Recover interactions for mapped genes
interactions <- string_db$get_interactions(mapped_genes$STRING_id)

# Visualise the network using igraph or other visualisation tools
library(igraph)
## 
## Attaching package: 'igraph'
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## The following objects are masked from 'package:rtracklayer':
## 
##     blocks, path
## The following object is masked from 'package:BiocIO':
## 
##     path
## The following object is masked from 'package:Biostrings':
## 
##     union
## The following object is masked from 'package:XVector':
## 
##     path
## The following object is masked from 'package:circlize':
## 
##     degree
## The following objects are masked from 'package:topGO':
## 
##     algorithm, graph
## The following objects are masked from 'package:graph':
## 
##     degree, edges, intersection, union
## The following object is masked from 'package:clusterProfiler':
## 
##     simplify
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following object is masked from 'package:GenomicRanges':
## 
##     union
## The following object is masked from 'package:IRanges':
## 
##     union
## The following object is masked from 'package:S4Vectors':
## 
##     union
## The following objects are masked from 'package:BiocGenerics':
## 
##     normalize, path, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
g <- graph_from_data_frame(interactions, directed=FALSE)
plot(g)

# Detect clusters (e.g. Louvain)
clusters <- cluster_louvain(g)

# Plot with colours by cluster
plot(g, vertex.color=clusters$membership)

# Perform GO enrichment analysis
# Biological Process
# Ensure you have the packages installed
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggplot2)
library(dplyr) # For data manipulation


# Biological Process
enrich_result <- enrichGO(gene = mapped_genes$Symbol,
                          OrgDb = org.Hs.eg.db,
                          keyType = "SYMBOL",
                          ont = "BP", # Biological Ontology. It can be ‘BP’, “MF”, or ‘CC’
                          pAdjustMethod = "BH", # Benjamini-Hochberg (BH) is the same as FDR.
                          pvalueCutoff = 0.05,
                          qvalueCutoff = 0.2)


if (is.null(enrich_result) || nrow(as.data.frame(enrich_result)) == 0) {
  print("No significantly enriched GO terms were found with the provided criteria.")
} else {
  plot_data <- as.data.frame(enrich_result) %>%
    arrange(p.adjust) %>%
    head(15) # Display the 15 most significant terms
  
  # Create a numeric column for the Y axis to use with geom_segment
  # The order of the terms on the Y axis is important.
  plot_data$y_pos <- rev(seq_along(plot_data$Description)) # To sort from top to bottom
  
  ggplot(plot_data, aes(x = Count, y = y_pos)) + # Use y_pos for the Y axis
    # Add geom_segment for the ‘threads’
    # It starts at x=0 (or a small value) and goes up to the Count of the term.
    geom_segment(aes(x = 0, xend = Count, y = y_pos, yend = y_pos),
                 color = "gray", linewidth = 0.5) +
    geom_point(aes(size = Count, color = p.adjust), alpha = 0.8) +
    scale_color_gradientn(
      colors = c("darkblue", "steelblue", "lightblue", "lightgreen", "yellow", "orange", "darkred"),
      name = "FDR",
      trans = "log10",
      breaks = scales::trans_breaks("log10", function(x) 10^x),
      labels = scales::trans_format("log10", scales::math_format(10^.x))
    ) +
    scale_size_area(max_size = 15, name = "Gene Counting") +
    # Map labels back to GO descriptions
    scale_y_continuous(breaks = plot_data$y_pos, labels = plot_data$Description) +
    labs(
      title = "GO Enrichment Analysis (Biological Processes)",
      x = "Genes Counting",
      y = "GO Terms (Biological Processes)"
    ) +
    theme_minimal() +
    theme(
      axis.text.y = element_text(size = 12, face = "bold"),
      axis.title = element_text(size = 14, face = "bold"),
      plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
      legend.position = "right",
      legend.title = element_text(size = 12, face = "bold"),
      legend.text = element_text(size = 10),
      panel.grid.major.y = element_blank(), # Remove the default horizontal grid lines
      panel.grid.minor = element_blank(),
      panel.border = element_rect(colour = "black", fill=NA, linewidth=1)
    )
}

barplot(enrich_result, showCategory = 10, title= "GO Enrichment Analysis")
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the enrichplot package.
##   Please report the issue at
##   <https://github.com/GuangchuangYu/enrichplot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Network graph of enriched terms
cnetplot(enrich_result, showCategory = 10)
## Warning: `aes_()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`
## ℹ The deprecated feature was likely used in the enrichplot package.
##   Please report the issue at
##   <https://github.com/GuangchuangYu/enrichplot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# Heatmap
heatplot(enrich_result, showCategory = 10)

# Perform KEGG enrichment analysis
ekegg <- enrichKEGG(gene = mapped_genes$ENTREZID, organism = "hsa", pAdjustMethod = "BH", qvalueCutoff = 0.05)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
# Generate GO/STRING style bubble chart
if (is.null(ekegg) || nrow(as.data.frame(ekegg)) == 0) {
  print("No significantly enriched KEGG pathways were found with the criteria provided.")
} else {
  plot_data_kegg <- as.data.frame(ekegg) %>%
    arrange(p.adjust) %>% # Sort by p.adjust (FDR)
    head(15) # Selects the 15 most significant pathways to plot

  # Creates a numerical position for the terms on the Y-axis, to control the order and lines
  plot_data_kegg$y_pos <- rev(seq_along(plot_data_kegg$Description)) # reverse to the most significant at the top
  
  # Starting to build the graph with ggplot2
  ggplot(plot_data_kegg, aes(x = Count, y = y_pos)) +
    # Adds the connection lines (wires) from the Y-axis to the bubble.
    geom_segment(aes(x = 0, xend = Count, y = y_pos, yend = y_pos),
                 color = "gray80", linewidth = 0.6) + # Softer colour for the lines
    # Add the bubbles
    geom_point(aes(size = Count, color = p.adjust), alpha = 0.9) +
    # Colour scale configuration for FDR (p.adjust)
    scale_color_gradientn(
      colors = c("darkblue", "steelblue", "lightblue", "lightgreen", "yellow", "orange", "darkred"),
      name = "FDR",
      trans = "log10", # Use logarithmic scale for FDR
      breaks = scales::trans_breaks("log10", function(x) 10^x), # Breaks in scientific notation captions
      labels = scales::trans_format("log10", scales::math_format(10^.x)), # Scientific notation format
      limits = c(min(plot_data_kegg$p.adjust), max(plot_data_kegg$p.adjust)) # Sets limits for data
    ) +
    # Size scale configuration for Gene Counting
    scale_size_area(
      max_size = 18, # Maximum bubble size
      name = "Gene count", # Caption title size
      breaks = unique(round(quantile(plot_data_kegg$Count, probs = c(0.25, 0.5, 0.75, 1.0)))), # Suggests reasonable breaks
      labels = unique(round(quantile(plot_data_kegg$Count, probs = c(0.25, 0.5, 0.75, 1.0))))
    ) +
    # Maps the Y-axis labels back to the KEGG pathway descriptions
    scale_y_continuous(
      breaks = plot_data_kegg$y_pos,
      labels = plot_data_kegg$Description
    ) +
    # Labels and Chart Title
    labs(
      title = "KEGG Pathway Enrichment Analysis",
      x = "Gene count", # X-axis is now Gene Count
      y = "KEGG Pathway" # Y-axis is the name of the pathway
    ) +
    # Chart Theme and Style
    theme_minimal() +
    theme(
      axis.text.y = element_text(size = 12, face = "bold"), # KEGG terms in bold and larger font
      axis.title = element_text(size = 14, face = "bold"), # Axis titles
      plot.title = element_text(size = 16, face = "bold", hjust = 0.5), # Centred title
      legend.position = "right", # Position of subtitles
      legend.title = element_text(size = 12, face = "bold"),
      legend.text = element_text(size = 10),
      panel.grid.major.y = element_blank(), # Remove standard horizontal grid lines
      panel.grid.minor = element_blank(), # Remove minor grid liness
      panel.border = element_rect(colour = "black", fill=NA, linewidth=1.2) # Black border around the plot
    )
} 

barplot(ekegg, showCategory = 10, title = "KEGG Enrichment Analysis")
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?

# Network graph of enriched terms
cnetplot(ekegg, showCategory = 10)
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# Heatmap
heatplot(ekegg, showCategory = 10)

7.Transcription factors in the analysis between non-lesion PsA vs. lesion PsA

# Load the necessary packages
library(GenomicFeatures)
## 
## Attaching package: 'GenomicFeatures'
## The following object is masked from 'package:topGO':
## 
##     genes
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(org.Hs.eg.db)
library(JASPAR2020)
library(TFBSTools)
library(SummarizedExperiment)
library(motifmatchr)

# Load the TxDb database
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene

# Extraction of exons by gene
exons_by_gene <- exonsBy(txdb, by = "gene")

# Obtain the coordinates of the genes (taking the first and last positions of the exons)
genes_info <- range(exons_by_gene)

# Obtain transcription information
transcripts_info <- transcripts(txdb)

# Map genes of interest to Entrez IDs (replace “de_genes” with the actual list of genes)
gene_entrez <- mapIds(org.Hs.eg.db, keys = mapped_genes$Symbol, column = "ENTREZID", keytype = "SYMBOL", multiVals = "first")
## 'select()' returned 1:1 mapping between keys and columns
# Filter the genes of interest
promoters_info <- subset(genes_info, names(genes_info) %in% gene_entrez)

# Define promoter regions (2 kb upstream of TSS)
promoters <- promoters(promoters_info, upstream = 2000, downstream = 0)
## Warning in valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE): GRanges object contains 1 out-of-bound range located on sequence
##   chr2_GL383522v1_alt. Note that ranges located on a sequence whose
##   length is unknown (NA) or on a circular sequence are not considered
##   out-of-bound (use seqlengths() and isCircular() to get the lengths and
##   circularity flags of the underlying sequences). You can use trim() to
##   trim these ranges. See ?`trim,GenomicRanges-method` for more
##   information.
# Filter the main chromosomes
promoters <- keepStandardChromosomes(promoters, pruning.mode = "coarse")

# Remove any regions that exceed the limits of the chromosomes.
promoters <- trim(promoters)

# Convert CompressedGRangesList promoters to GRanges
promoters_gr <- unlist(promoters)

# Load the JASPAR grounds database
motifs <- getMatrixSet(JASPAR2020, opts = list(species = "Homo sapiens"))

# Create a data frame with the IDs of the reasons and their names (TFs)
motif_info <- data.frame(
  motif_id = names(motifs),
  tf_name = sapply(motifs, function(x) x@name),
  stringsAsFactors = FALSE
)

# Enriching motifs in promoting regions
motifHits <- matchMotifs(motifs, promoters_gr, genome = BSgenome.Hsapiens.UCSC.hg38)

# Count the number of occurrences of the reasons in each promoting region.
motif_counts <- countOverlaps(promoters_gr, motifHits)

# Add counts to GRanges with promoting regions
promoters_gr$motif_counts <- motif_counts

# Summary of motif counts
summary(promoters_gr$motif_counts)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.4605  1.0000  1.0000
# Obtain the names of the motifs and TFs
motif_names <- names(motifHits)

# Extract the reason table from the RangedSummarizedExperiment object
motif_data <- assays(motifHits)[[1]] # Assuming that the table of reasons is in the first list of assays

# Obtain information about TFs
# The names of TFs may be in the colnames or in associated metadata
tf_names <- colnames(motif_data)  

# Add the reason count if available
motif_counts <- rowSums(motif_data) 

# Check the dimensions of the motif_data object
dim(motif_data) # Number of rows and columns
## [1] 152 633
# Check the length of tf_names and motif_counts
tf_names_length <- length(tf_names)
motif_counts_length <- length(motif_counts)

# Print the lengths for diagnosis
cat("Length of tf_names:", tf_names_length, "\n")
## Length of tf_names: 633
cat("Length of motif_counts:", motif_counts_length, "\n")
## Length of motif_counts: 152
# Verify that the dimensions of motif_data correspond to the number of TFs and motifs
motif_data_dims <- dim(motif_data)
cat("Dimensions of motif_data (rows, columns):", motif_data_dims, "\n")
## Dimensions of motif_data (rows, columns): 152 633
# Check whether the number of reasons is greater or lesser
if (tf_names_length > motif_counts_length) {
  # Check additional columns
  tf_names <- tf_names[1:motif_counts_length] # Adjust to the length of motif_counts
} else {
  # Adjust motif_counts to match tf_names
  motif_counts <- motif_counts[1:tf_names_length]
}

# Now, create the data frame with adjusted lengths
motif_summary <- data.frame(
  tf_name = tf_names,
  motif_count = motif_counts
)

# Check the correspondence between TF names and the columns in motif_data
all(tf_names %in% colnames(motif_data)) # Should return TRUE if all TF names are present
## [1] TRUE
# Sort and identify key TFs
tf_summary <- motif_summary[order(motif_summary$motif_count, decreasing = TRUE), ]
top_tf_summary <- head(tf_summary, 121) # Adjust the number as required

# Mapping Entrez IDs to gene symbols
gene_symbols <- mapIds(org.Hs.eg.db, keys = rownames(top_tf_summary), column = "SYMBOL", keytype = "ENTREZID", multiVals = "first")
## 'select()' returned 1:1 mapping between keys and columns
# Replace rownames with gene symbols
rownames(top_tf_summary) <- gene_symbols

# Add reason ID as a column
top_tf_summary$motif_id <- rownames(top_tf_summary)

# Add to the table of TF names
motif1 <- merge(top_tf_summary, motif_info, by.x = "tf_name", by.y = "motif_id")

# Reorder columns
motif2 <- motif1[, c("motif_id", "tf_name.y", "motif_count")]
colnames(motif2) <- c("motif_id", "tf_name", "motif_count")

# Sort by frequency (motif_count) in descending order
motif2 <- motif2 %>% arrange(desc(motif_count))

print(motif2)
##     motif_id             tf_name motif_count
## 1      CCL20                PAX4         231
## 2      CXCL8              BARHL2         205
## 3      CCRL2              PHOX2A         201
## 4       CCR7                SOX9         195
## 5       MMP9               HNF1A         194
## 6    TNFSF10                PDX1         185
## 7      CCND1             NEUROG2         183
## 8       GJB6            MAX::MYC         176
## 9      CXCL2         GATA1::TAL1         173
## 10       PI3                LHX6         173
## 11      CD83                 RAX         173
## 12      IFI6               NR2C2         169
## 13      FUT2           MAF::NFE2         167
## 14     IFIH1              POU4F2         165
## 15       NMI               PITX3         165
## 16      DKK1               FOSL2         164
## 17   TNFRSF9                 DBP         164
## 18      PRF1                 MSC         164
## 19       GEM              TCF7L2         162
## 20      GZMA              SREBF1         162
## 21      GZMK               THAP1         162
## 22     PINK1                 SRF         161
## 23    TGFBR3               TFCP2         161
## 24      MEFV               GRHL1         161
## 25      RPL7                NFIX         161
## 26     KLRB1                FLI1         160
## 27   EFCAB13          TAL1::TCF3         158
## 28     SAMD9                 MLX         158
## 29     PLIN5               HNF1B         156
## 30   PLA2G4D                ESR2         153
## 31     PPARG               MEOX1         153
## 32      RORC                NFIA         153
## 33     DDX60                MSX1         152
## 34     IL12B               CREB3         151
## 35      MCAM                GCM1         151
## 36    FAXDC2               PPARG         149
## 37     MUCL1         RORA(var.2)         149
## 38       IL4                ALX3         149
## 39      SGK1                 SP4         149
## 40      CCR2              ZBTB7B         148
## 41     IL17F                PBX1         147
## 42   SLC7A11         JUND(var.2)         147
## 43    TRIM22               FOXD1         146
## 44     APOL6              ZBTB18         146
## 45      IL18                ELF5         145
## 46      JUND                ETV6         145
## 47      IRS1                 EN2         143
## 48     ITGAL                ESX1         143
## 49  SERPINB1               PLAG1         142
## 50      OLR1                IRF9         142
## 51     RGPD6              ZBTB7C         142
## 52      IL37                ELK4         141
## 53     DDIT3              NFATC2         140
## 54    S100A8               NHLH1         139
## 55     TBX21               FOXP2         139
## 56      SOST                JDP2         139
## 57      PER1               KLF13         139
## 58     BGLAP              NKX6-1         139
## 59      SOD2                TBX2         139
## 60       LYZ               FOXO3         138
## 61     ABCD2                 FOS         138
## 62      GJB2              ZBTB33         138
## 63      NOD2                PAX7         138
## 64     RSAD2               PROP1         138
## 65     CTLA4          NFIC::TLX1         137
## 66      MMP7               HINFP         137
## 67     DUSP4               INSM1         137
## 68      CSF2                 REL         136
## 69       FOS                 JUN         136
## 70    IL36RN        STAT1::STAT2         136
## 71      BMP2                SPIC         136
## 72      SYT1               TFAP4         136
## 73  PPARGC1A                PAX6         135
## 74     CMTM2                RELA         135
## 75     HYAL4          JUN(var.2)         135
## 76     IL2RA                SHOX         135
## 77      CRB1               FOXH1         134
## 78      XAF1               MIXL1         134
## 79      IL21             NEUROD2         134
## 80      GBP5                RORA         133
## 81    SLC51B           RXRA::VDR         133
## 82     IL17A                ELF4         133
## 83     SCN1A              NKX6-2         133
## 84     IL23R         NR1H2::RXRA         132
## 85     IL23A         JDP2(var.2)         132
## 86      MMP1                 GSC         131
## 87      NOS2                IRF8         131
## 88     APOL1                OTX1         131
## 89       ALB         ARNT::HIF1A         129
## 90    CYP1A1                CTCF         128
## 91      IL22                 ISX         128
## 92      CD69                VSX2         128
## 93       EGF               NR4A2         127
## 94     GATA3              POU2F2         127
## 95    SLC7A5                LBX2         127
## 96      CD3E               PRRX1         127
## 97     GPR35                IRF1         126
## 98     SPON2                IRF2         126
## 99    ZNF415                MYF6         126
## 100     GZMB              SREBF2         125
## 101    CAPS2                NOTO         125
## 102    IFI16                KLF5         123
## 103   IL1F10                MSX2         123
## 104     CD63                VSX1         123
## 105     JDP2               RREB1         122
## 106     CTSK             ZNF354C         122
## 107     TYMP          RARA::RXRA         122
## 108     NKG7                HSF1         122
## 109     GBP1 SMAD2::SMAD3::SMAD4         122
## 110    CXCR2             BHLHE41         122
## 111     CD38               VENTX         122
## 112    ACKR2                 SRY         121
## 113     ICOS                 YY1         121
## 114      GPT               STAT1         121
## 115     FRZB               MEF2C         121
## 116     ACP7               FOXI1         120
## 117    CD274               STAT3         120
## 118     IL1B              POU6F1         120
## 119    FOSL1                ZIC3         120
## 120    NAMPT               FOXF2         119
## 121      LEP               FOXL1         118

8. Comparison between samples from patients with skin lesions and control samples

# Wald test to obtain contrast-specific results
dsd <- DESeq(dsa, test = "Wald", sfType = "poscount")
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 4 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
r <- results(dsd, contrast = c("Group", groups[2], groups[1]), alpha = 0.05, pAdjustMethod = "fdr")

groups <- levels(colData(dsd)$Group)

df <- as.data.frame(r)

# Filter only significant genes
sig_genes <- subset(r, padj < 0.05 & abs(log2FoldChange) >= 1)

# volcano plot
old.pal <- palette(c("#00BFFF", "#FF3030")) # low-hi colors
par(mar=c(4,4,2,1), cex.main=1.5)
with(sig_genes, {
  plot(log2FoldChange, -log10(padj), main=paste(groups[2], "vs", groups[1]),
       xlab="log2FC", ylab="-log10(Padj)", pch=20, cex=0.5)
  text(log2FoldChange, -log10(padj), labels=rownames(r), cex=0.6, pos=4)
})
with(subset(r, padj<0.05 & abs(log2FoldChange) >= 1),
     points(log2FoldChange, -log10(padj), pch=20, col=(sign(log2FoldChange) + 3)/2, cex=1))
legend("bottomleft", title=paste("Padj<", 0.05, sep=""), legend=c("down", "up"), pch=20,col=1:2)

# Plot only significant genes
# MD PLOT
par(mar=c(4,4,2,1), cex.main=1.5)
with(sig_genes, {
  plot(log10(baseMean), log2FoldChange,
       main=paste(groups[2], "vs", groups[1]),
       xlab="log10(mean of normalized counts)", ylab="log2FoldChange",
       pch=20, col=(sign(log2FoldChange) + 3)/2, cex=1)
  text(log10(baseMean), log2FoldChange, labels=rownames(sig_genes), cex=0.6, pos=4)
})
legend("bottomleft", title=paste("Padj<", 0.05, sep=""), legend=c("down", "up"), pch=20, col=1:2)
abline(h=0)

palette(old.pal) # restore palette

# Add gene symbols to the points on the graph
# Install the packages if you do not already have them
library(ggplot2)
library(ggrepel)

plotVolcano <- function(res, title = "Volcano Plot") {
  res$group <- "NS"
  res$group[res$padj < 0.05 & res$log2FoldChange > 1] <- "Up"
  res$group[res$padj < 0.05 & res$log2FoldChange < -1] <- "Down"
  
  res$label <- ifelse(res$group != "NS", rownames(res), NA)
  
  ggplot(res, aes(x = log2FoldChange, y = -log10(padj), color = group)) +
    geom_point(alpha = 0.7, size = 2) +
    geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
    geom_vline(xintercept = c(-1, 1), linetype = "dashed") +
    scale_color_manual(values = c("Up" = "firebrick", "Down" = "dodgerblue", "NS" = "grey80")) +
    ggrepel::geom_text_repel(aes(label = label), size = 3, max.overlaps = Inf) +
    theme_minimal() +
    labs(title = title, x = "log2 Fold Change", y = "-log10 adjusted p-value", color = "Regulation")
}

# Example of function usage
plotVolcano(r, paste(groups[2], "vs", groups[1]))
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 280 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

# Genes upregulated
up_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange >= 1))

# Genes downregulated
down_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange <= -1))

# All DEGs with |log2FC| > 2
de_genes <- c(up_genes, down_genes)

expr_datasa <- as.data.frame(expr_data[rownames(expr_data) %in% de_genes, ])

# Convert expression data to long format for ggplot2
expr_datas <- as.data.frame(expr_datasa)
expr_datas$Gene <- rownames(expr_datas)
expr_datas <- pivot_longer(expr_datas, cols = -Gene, names_to = "Sample", values_to = "Expression")
expr_datas <- merge(expr_datas, pheno_data, by.x = "Sample", by.y = "geo_accession")

# Check column names and a sample of the data
expresse <- r[order(r$padj)[1:429], ]
expresse <- merge(as.data.frame(expresse), expr_datasa, by = 0, sort = F)


library(dplyr)
library(tibble)
library(clusterProfiler)
library(ggplot2)

r_df <- as.data.frame(r)

# Assuming that “r_df” is your data frame with the DESeq2 results

# This ensures that gene names are preserved.
r_df_com_genes <- r_df %>%
  rownames_to_column(var = "Gene") 

top30_fc <- r_df_com_genes %>%
  dplyr::filter(padj < 0.05 & abs(log2FoldChange) > 1) %>%
  dplyr::arrange(desc(abs(log2FoldChange))) %>%
  dplyr::slice(1:30) %>%
  dplyr::mutate(
    Regulation = case_when(
      log2FoldChange > 1 ~ "Upregulated",
      log2FoldChange < -1 ~ "Downregulated"
    )
  )

# Chart with positive and negative bars
ggplot(top30_fc, aes(x = reorder(Gene, log2FoldChange), y = log2FoldChange, fill = Regulation)) +
  geom_col() +
  coord_flip() + # Place the genes on the Y axis
  scale_fill_manual(values = c("Upregulated" = "steelblue", "Downregulated" = "tomato")) +
  scale_y_continuous(breaks = seq(-8, 4, by = 1)) + # Adjust the limits if necessary.
  theme_bw() +
  labs(
    title = "The 30 most prominent DEGs between lesion PsA and control (GSE205748)",
    x = "Gene",
    y = "log2 Fold Change",
    fill = "Regulation"
  ) + 
  theme(
    axis.text.y = element_text(color = "black", face = "bold", size = 9)
  )

# --- Selects the 20 MOST overexpressed genes ---
up_20genes <- expresse %>%
  filter(padj < 0.05 & log2FoldChange > 1) %>%
  arrange(desc(log2FoldChange)) %>% 
  head(20) %>%
  pull(Row.names)

# --- Filters expression data ---
expr_datas_filtrado_up <- expr_datas %>%
  filter(`tissue type:ch1` %in% c("Psoriatic arthritis skin lesion", "Healthy control skin")) %>%
  filter(Gene %in% up_20genes)

# --- Calculates the average per gene and per group  ---
expr_datasa_up <- expr_datas_filtrado_up %>%
  group_by(Gene, `tissue type:ch1`) %>%
  summarise(
    mean_expression = mean(Expression, na.rm = TRUE),
    sd_expression = sd(Expression, na.rm = TRUE),
    .groups = 'drop'
  )

# --- Preparation for the chart ---
# Filter to show the average expression ONLY in the “lesion” group
dados_para_plotar <- expr_datasa_up %>%
  filter(`tissue type:ch1` == "Psoriatic arthritis skin lesion")

# --- Graph Generation ---
ggplot(dados_para_plotar, aes(x = mean_expression, y = reorder(Gene, mean_expression))) +
  geom_col(fill = "steelblue", color = "black", width = 0.7) +
  labs(
    title = "Average Expression of the Main Upregulated Genes in Psoriatic arthritis skin lesion", # Título mais claro
    subtitle = "For lesion PsA vs control (GSE205748)",
    x = "Average Expression in the Group 'skin lesion'", # Rótulo do eixo mais claro
    y = "Gene"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid.major.y = element_blank(),
    axis.text.y = element_text(size = 12)
  )

# --- Selects the 20 MOST under-expressed genes ---
down_20genes <- expresse %>%
  filter(padj < 0.05 & log2FoldChange < -1) %>%
  arrange(log2FoldChange) %>% # <-- Sort from most negative to least negative
  head(20) %>%
  pull(Row.names)

# --- Filters expression data for genes and groups of interest ---
expr_datas_filtrado_down <- expr_datas %>%
  filter(`tissue type:ch1` %in% c("Psoriatic arthritis skin lesion", "Healthy control skin")) %>%
  filter(Gene %in% down_20genes)

# --- Calculates the average per gene and per group ---
expr_datasa_down <- expr_datas_filtrado_down %>%
  group_by(Gene, `tissue type:ch1`) %>%
  summarise(
    mean_expression = mean(Expression, na.rm = TRUE),
    sd_expression = sd(Expression, na.rm = TRUE),
    .groups = 'drop'
  )

# --- Preparation for the chart ---
# Filter to show the average expression ONLY in the “lesion” group
dados_para_plotar <- expr_datasa_down %>%
  filter(`tissue type:ch1` == "Psoriatic arthritis skin lesion")

# --- Graph Generation ---
ggplot(dados_para_plotar, aes(x = mean_expression, y = reorder(Gene, -mean_expression))) +
  geom_col(fill = "red", color = "black", width = 0.7) +
  labs(
    title = "Average Expression of the Main Downregulated Genes in Psoriatic arthritis skin lesion", # Título mais claro
    subtitle = "For lesion PsA vs control (GSE205748)",
    x = "Average Expression in the Group 'skin lesion'", # Rótulo do eixo mais claro
    y = "Gene"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid.major.y = element_blank(),
    axis.text.y = element_text(size = 12)
  )

# --- The code to create 'count_array' ---
up_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange > 1))
down_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange < -1))
genes_interesse <- c(up_genes, down_genes)

expr_filtrado <- expr_datas %>%
  filter(`tissue type:ch1` %in% c("Psoriatic arthritis skin lesion", "Healthy control skin")) %>%
  filter(Gene %in% genes_interesse)

matriz_contagem <- expr_filtrado %>%
  select(Gene, Sample, Expression) %>%
  pivot_wider(names_from = Sample, values_from = Expression, values_fill = 0) %>%
  column_to_rownames(var = "Gene")

# Filter your original “sample_info” object to keep only the samples that are in your final matrix
# This ensures that the annotation corresponds exactly to the heatmap data
sample_info_filtrado <- subset(sample_info, rownames(sample_info) %in% colnames(matriz_contagem))


# Select the 20 most highly expressed genes
media_dos_genes <- rowMeans(matriz_contagem)
media_ordenada <- sort(media_dos_genes, decreasing = TRUE)
top_20_genes_por_media <- names(head(media_ordenada, 20))
heatmapData_top20 <- as.matrix(matriz_contagem[top_20_genes_por_media, ])


# Create the annotation for the heatmap from the filtered and corrected information
annotation_data <- data.frame(
  Group = sample_info_filtrado$Group,
  row.names = rownames(sample_info_filtrado)
)

# Ensure that the order of the columns in the heatmap and annotation is the same
heatmapData_top20 <- heatmapData_top20[, rownames(annotation_data)]


# --- Heatmap generation with ComplexHeatmap ---

# Rescale data by row (Z-score)
mat_scaled <- t(scale(t(heatmapData_top20)))

# Create the column annotation
ha_col <- HeatmapAnnotation(
  Group = annotation_data$Group,
  col = list(Group = c("lesion.PsA" = "#1f77b4", "control" = "#d62728"))
)

# Define the colour palette
cores <- colorRamp2(c(-2, 0, 2), rev(RColorBrewer::brewer.pal(3, "RdBu")))

# Generate the heatmap
Heatmap(
  mat_scaled,
  name = "Z-score",
  top_annotation = ha_col,
  col = cores,
  
  # Divide the columns into groups for clearer viewing
  column_split = annotation_data$Group,
  
  cluster_rows = TRUE,
  cluster_columns = TRUE, # Cluster within each divided group
  
  show_column_names = FALSE,
  show_row_names = TRUE,
  column_title = "Heatmap of the 20 most highly expressed DEGs (lesion PsA vs. control GSE205748)",
  heatmap_legend_param = list(title = "Z-score")
)

# Filter differentially expressed genes
de_genes <- subset(r, padj < 0.05 & abs(log2FoldChange) > 1)

#Create a data frame from de_genes
de_genes <- data.frame(de_genes)

# Suppose your dataframe is called df and you want to select columns ‘column1’, “column2” and ‘column3’
de_genes <- de_genes %>% select(log2FoldChange, padj)

# Assuming that rownames(df) are gene symbols
gene_symbols <- rownames(de_genes)

# Convert gene symbols to Entrez IDs
gene_entrez_ids <- bitr(gene_symbols, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
## 'select()' returned 1:1 mapping between keys and columns
# First, convert the rownames from df into a column
de_genes$Symbol <- rownames(de_genes)

# Performs the join between the original table and the converted IDs
de_genes <- merge(de_genes, gene_entrez_ids, by.x = "Symbol", by.y = "SYMBOL", all.x = TRUE)

# Ensure unique symbols
de_genes <- de_genes[!duplicated(de_genes$ENTREZID), ]

# Alternative Workflow with STRINGdb Integration in R
# Initialise the STRINGdb object for a specific organism
string_db <- STRINGdb$new(version = "12", species = 9606, score_threshold = 400, input_directory = "")

options(timeout = 600)  # increases to 10 minutes

# Gene mapping using STRINGdb
mapped_genes <- string_db$map(de_genes, "Symbol", removeUnmappedRows = TRUE)
## Warning:  we couldn't map to STRING 2% of your identifiers
# Recover interactions for mapped genes
interactions <- string_db$get_interactions(mapped_genes$STRING_id)

# Visualise the network using igraph or other visualisation tools
library(igraph)
g <- graph_from_data_frame(interactions, directed=FALSE)
plot(g)

# Detect clusters (e.g. Louvain)
clusters <- cluster_louvain(g)

# Plot with colours by cluster
plot(g, vertex.color=clusters$membership)

# Perform GO enrichment analysis
# Biological Process
enrich_result <- enrichGO(gene = mapped_genes$Symbol,
                          OrgDb = org.Hs.eg.db,
                          keyType = "SYMBOL",
                          ont = "BP", # Biological Ontology. It can be ‘BP’, “MF”, or ‘CC’.
                          pAdjustMethod = "BH",
                          pvalueCutoff = 0.05,
                          qvalueCutoff = 0.2)

barplot(enrich_result, showCategory = 10, title= "GO Enrichment Analysis")
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?

# Network graph of enriched terms
cnetplot(enrich_result, showCategory = 10)
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# Heatmap
heatplot(enrich_result, showCategory = 10)

# Perform KEGG enrichment analysis
ekegg <- enrichKEGG(gene = mapped_genes$ENTREZID, organism = "hsa", pAdjustMethod = "BH", qvalueCutoff = 0.05)

# Generate GO/STRING style bubble chart
if (is.null(ekegg) || nrow(as.data.frame(ekegg)) == 0) {
  print("No significantly enriched KEGG pathways were found with the criteria provided.")
} else {
  plot_data_kegg <- as.data.frame(ekegg) %>%
    arrange(p.adjust) %>% # Sort by p.adjust (FDR)
    head(15) # Selects the 15 most significant pathways to plot

  # Creates a numerical position for the terms on the Y-axis, to control the order and lines
  plot_data_kegg$y_pos <- rev(seq_along(plot_data_kegg$Description)) # reverse to the most significant at the top
  
  # Starting to build the graph with ggplot2
  ggplot(plot_data_kegg, aes(x = Count, y = y_pos)) +
    # Adds the connection lines (wires) from the Y-axis to the bubble
    geom_segment(aes(x = 0, xend = Count, y = y_pos, yend = y_pos),
                 color = "gray80", linewidth = 0.6) + # Softer colour for the lines
    # Add the bubbles
    geom_point(aes(size = Count, color = p.adjust), alpha = 0.9) +
    # Colour scale configuration for FDR (p.adjust)
    scale_color_gradientn(
      colors = c("darkblue", "steelblue", "lightblue", "lightgreen", "yellow", "orange", "darkred"),
      name = "FDR",
      trans = "log10", # Use logarithmic scale for FDR
      breaks = scales::trans_breaks("log10", function(x) 10^x), # Breaks in scientific notation captions
      labels = scales::trans_format("log10", scales::math_format(10^.x)), # Scientific notation format
      limits = c(min(plot_data_kegg$p.adjust), max(plot_data_kegg$p.adjust)) # Sets limits for data
    ) +
    # Size scale configuration for Gene Counting
    scale_size_area(
      max_size = 18, # Maximum bubble size
      name = "Gene count", # Caption title size
      breaks = unique(round(quantile(plot_data_kegg$Count, probs = c(0.25, 0.5, 0.75, 1.0)))), # Suggests reasonable breaks
      labels = unique(round(quantile(plot_data_kegg$Count, probs = c(0.25, 0.5, 0.75, 1.0))))
    ) +
    # Maps the Y-axis labels back to the KEGG pathway descriptions
    scale_y_continuous(
      breaks = plot_data_kegg$y_pos,
      labels = plot_data_kegg$Description
    ) +
    # Labels and Chart Title
    labs(
      title = "KEGG Pathway Enrichment Analysis",
      x = "Gene count", # X-axis is now Gene Count
      y = "KEGG Pathway" # Y-axis is the name of the pathway
    ) +
    # Chart Theme and Style
    theme_minimal() +
    theme(
      axis.text.y = element_text(size = 12, face = "bold"), # KEGG terms in bold and larger font
      axis.title = element_text(size = 14, face = "bold"), # Axis titles
      plot.title = element_text(size = 16, face = "bold", hjust = 0.5), # Centred title
      legend.position = "right", # Position of subtitles
      legend.title = element_text(size = 12, face = "bold"),
      legend.text = element_text(size = 10),
      panel.grid.major.y = element_blank(), # Remove standard horizontal grid lines
      panel.grid.minor = element_blank(), # Remove smaller grid lines
      panel.border = element_rect(colour = "black", fill=NA, linewidth=1.2) # Black border around the plot
    )
} 

barplot(ekegg, showCategory = 10, title = "KEGG Enrichment Analysis")
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?

# Network graph of enriched terms
cnetplot(ekegg, showCategory = 10)
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# Heatmap
heatplot(ekegg, showCategory = 10)

9. Transcription factors between the analysis of lesion PsA vs Control

# Load the TxDb database
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene

# Extraction of exons by gene
exons_by_gene <- exonsBy(txdb, by = "gene")

# Obtain the coordinates of the genes (taking the first and last positions of the exons)
genes_info <- range(exons_by_gene)

# Obtain transcription information
transcripts_info <- transcripts(txdb)

# Map genes of interest to Entrez IDs (replace “de_genes” with the actual list of genes)
gene_entrez <- mapIds(org.Hs.eg.db, keys = mapped_genes$Symbol, column = "ENTREZID", keytype = "SYMBOL", multiVals = "first")
## 'select()' returned 1:1 mapping between keys and columns
# Filter the genes of interest
promoters_info <- subset(genes_info, names(genes_info) %in% gene_entrez)

# Define promoter regions (2 kb upstream of TSS)
promoters <- promoters(promoters_info, upstream = 2000, downstream = 0)

# Filter the main chromosomes
promoters <- keepStandardChromosomes(promoters, pruning.mode = "coarse")

# Remove any regions that exceed the limits of the chromosomes.
promoters <- trim(promoters)

# Convert CompressedGRangesList promoters to GRanges
promoters_gr <- unlist(promoters)

# Load the JASPAR grounds database
motifs <- getMatrixSet(JASPAR2020, opts = list(species = "Homo sapiens"))

# Create a data frame with the IDs of the reasons and their names (TFs)
motif_info <- data.frame(
  motif_id = names(motifs),
  tf_name = sapply(motifs, function(x) x@name),
  stringsAsFactors = FALSE
)

# Enriching motifs in promoting regions
motifHits <- matchMotifs(motifs, promoters_gr, genome = BSgenome.Hsapiens.UCSC.hg38)

# Count the number of occurrences of the reasons in each promoting region
motif_counts <- countOverlaps(promoters_gr, motifHits)

# Add counts to GRanges with promoting regions
promoters_gr$motif_counts <- motif_counts

# Summary of motif counts
summary(promoters_gr$motif_counts)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.4965  1.0000  1.0000
# Obtain the names of the motifs and TFs
motif_names <- names(motifHits)

# Extract the reason table from the RangedSummarizedExperiment object
motif_data <- assays(motifHits)[[1]] # Assuming that the table of reasons is in the first list of assays

# Obtain information about TFs
# The names of the TFs can be in the colnames or in associated metadata
tf_names <- colnames(motif_data)  

# Add the reason count if available
motif_counts <- rowSums(motif_data) # Count the occurrence of motifs (adjust as necessary)

# Check the dimensions of the motif_data object
dim(motif_data) # Number of rows and columns
## [1] 141 633
# Check the length of tf_names and motif_counts
tf_names_length <- length(tf_names)
motif_counts_length <- length(motif_counts)

# Print the lengths for diagnosis
cat("Length of tf_names:", tf_names_length, "\n")
## Length of tf_names: 633
cat("Length of motif_counts:", motif_counts_length, "\n")
## Length of motif_counts: 141
# Verify that the dimensions of motif_data correspond to the number of TFs and motifs
motif_data_dims <- dim(motif_data)
cat("Dimensions of motif_data (rows, columns):", motif_data_dims, "\n")
## Dimensions of motif_data (rows, columns): 141 633
# Check whether the number of reasons is greater or lesser
if (tf_names_length > motif_counts_length) {
  # Check additional columns
  tf_names <- tf_names[1:motif_counts_length] # Adjust to the length of motif_counts
} else {
  # Adjust motif_counts to match tf_names
  motif_counts <- motif_counts[1:tf_names_length]
}

# Now, I have created the data frame with adjusted lengths
motif_summary <- data.frame(
  tf_name = tf_names,
  motif_count = motif_counts
)

# Check the correspondence between TF names and the columns in motif_data
all(tf_names %in% colnames(motif_data)) # Should return TRUE if all TF names are present
## [1] TRUE
# Sort and identify key TFs
tf_summary <- motif_summary[order(motif_summary$motif_count, decreasing = TRUE), ]
top_tf_summary <- head(tf_summary, 130) # Adjust the number as required.

# Mapping Entrez IDs to gene symbols
gene_symbols <- mapIds(org.Hs.eg.db, keys = rownames(top_tf_summary), column = "SYMBOL", keytype = "ENTREZID", multiVals = "first")
## 'select()' returned 1:1 mapping between keys and columns
# Replace rownames with gene symbols
rownames(top_tf_summary) <- gene_symbols

top_tf_summary$motif_id <- rownames(top_tf_summary)

# Combine with the table of TF names
motif1 <- merge(top_tf_summary, motif_info, by.x = "tf_name", by.y = "motif_id")

# Reorganise columns for better viewing
motif2<- motif1[, c("motif_id", "tf_name.y", "motif_count")]
colnames(motif2) <- c("motif_id", "tf_name.y", "motif_count")

# View
print(motif2)
##     motif_id           tf_name.y motif_count
## 1        VIM                TBXT         176
## 2       MMP1                 EN1         131
## 3      NAMPT               FOXF2         117
## 4     TRIM22               FOXD1         144
## 5       OLR1               FOXL1         139
## 6       NOS2               FOXI1         131
## 7      PPARG               HNF1A         153
## 8       SELL               NHLH1         127
## 9       GZMB                IRF1         125
## 10     SPON2                IRF2         127
## 11      GJB6            MAX::MYC         176
## 12  PPARGC1A               PPARG         135
## 13      SOD2                PAX4         137
## 14     IL17F                PAX6         147
## 15      GBP5                PBX1         133
## 16     MUCL1                RORA         148
## 17      CCR7         RORA(var.2)         197
## 18     ACKR2               RREB1         120
## 19   EFCAB13           RXRA::VDR         156
## 20      ICOS                ELK4         121
## 21  PPARGC1B                SOX9         138
## 22     CTLA4                 SRY         137
## 23      CTSK          TAL1::TCF3         123
## 24     IFIT3                 YY1         115
## 25    CYP1A1                 REL         128
## 26    ZNF483                RELA         112
## 27     KANK4         NR1H2::RXRA         117
## 28     DDIT3          NFIC::TLX1         139
## 29      TYMP             ZNF354C         121
## 30     TOMM7               HINFP         116
## 31      CD36                PDX1         181
## 32      MEFV                ELF5         159
## 33     HIF1A               STAT1         152
## 34       ALB                REST         131
## 35       EGF                CTCF         127
## 36     IFI16         GATA1::TAL1         122
## 37       HK2               STAT3         126
## 38     EOMES               TFCP2         111
## 39  SERPINB1          EWSR1-FLI1         143
## 40    FCGR1A              NFATC2         111
## 41      XAF1               HNF1B         133
## 42    FCGR3A               INSM1         155
## 43      IL22               FOXO3         128
## 44      DKK1          RARA::RXRA         164
## 45      CRB1               NR4A2         133
## 46       FOS               PLAG1         138
## 47     TBX21                ESR2         141
## 48     HYAL4         ARNT::HIF1A         134
## 49   SLC7A11                DUX4         145
## 50       MX1                FLI1         113
## 51      FRZB                 FOS         121
## 52      FUT2               FOSL2         166
## 53      IFI6               FOXH1         167
## 54     HERC6                HSF1         122
## 55    PTPN22                 JUN         113
## 56     GATA3          JUN(var.2)         126
## 57      GBP1         JUND(var.2)         121
## 58    IL36RN               MEF2C         135
## 59       GEM           MAF::NFE2         158
## 60      GJB2               NR2C2         138
## 61      IL37                NRF1         140
## 62   PLA2G4D              POU2F2         154
## 63     GPR35 SMAD2::SMAD3::SMAD4         126
## 64       GPT        STAT1::STAT2         120
## 65     CD274              TCF7L2         122
## 66     CXCL2              ZBTB33         170
## 67      IFNG               FOXP2         113
## 68      IL1B              SREBF2         119
## 69     IL2RA               THAP1         135
## 70       IL4                KLF5         149
## 71     CXCL8               DMRT3         207
## 72     CXCR2               FOXG1         122
## 73     IL12B              NFATC3         153
## 74   TNFRSF9              POU6F1         160
## 75     IL17A                SHOX         133
## 76      IL18                ALX3         144
## 77    CXCL10              BARHL2         114
## 78      JUND             BHLHE41         146
## 79     KLRB1               CENPB         159
## 80      ACP7               CREB3         120
## 81       LEP                 DBP         119
## 82       LYZ                ELF4         137
## 83      MMP7                ESX1         135
## 84      MMP9                ETV6         194
## 85      PER1                GCM1         138
## 86       PI3               GRHL1         168
## 87      ACP5                 GSC         161
## 88      SMOX                HEY2         128
## 89     SAMD9              HOXC11         157
## 90      PRF1                IRF8         166
## 91     DDX60                IRF9         149
## 92    ZNF415                 ISX         125
## 93   ADAMTS9                JDP2         158
## 94      IL21         JDP2(var.2)         134
## 95     CCND1               KLF13         183
## 96      RORC                LHX6         153
## 97      RPL7               MEF2B         160
## 98     RPL15               MEOX1         154
## 99     RPL41               MIXL1         124
## 100     RPS7                 MLX         126
## 101    RPS19              MLXIPL         138
## 102    RPS21                 MSC         130
## 103   S100A8                MSX1         135
## 104  S100A12             NEUROD2         115
## 105    BGLAP             NEUROG2         139
## 106    CCL20                NFIA         237
## 107    PRDM1                NFIX         154
## 108     NOD2              NKX2-3         139
## 109    IFIH1              NKX2-8         169
## 110    CSMD1              NKX6-1         108
## 111     BMP2              NKX6-2         138
## 112     SPP1                PAX7         111
## 113    STAT1              POU4F2         108
## 114    STAT3                 SP4         136
## 115     SYT1               SPDEF         134
## 116   TGFBR3                SPIC         160
## 117  ZC3H12A                TBX2         113
## 118    FOSL1               TBX20         120
## 119    APOL6               TBX21         145
## 120   SLC7A5               TFAP4         126
## 121   EFCAB7                TFEB         114
## 122   IL1F10              ZBTB7B         126
## 123    CAPS2              ZBTB7C         125
## 124    APOL1                ZIC1         130
## 125    RSAD2                ZIC3         137
## 126     CD3E              ZBTB18         128
## 127     OSMR                LBX2         141
## 128     CD28                MSX2         111
## 129     CD38              PHOX2A         121
## 130     CD63               PITX3         124

10.Comparison between samples from patients non lesion PsA vs Control.

# Wald test to obtain contrast-specific results
dsd <- DESeq(dsa, test = "Wald", sfType = "poscount")
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 4 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
r <- results(dsd, contrast = c("Group", groups[3], groups[1]), alpha = 0.05, pAdjustMethod = "fdr")

groups <- levels(colData(dsd)$Group)

df <- as.data.frame(r)

# Filter only significant genes
sig_genes <- subset(r, padj < 0.05 & abs(log2FoldChange) >= 1)

# volcano plot
old.pal <- palette(c("#00BFFF", "#FF3030")) # low-hi colors
par(mar=c(4,4,2,1), cex.main=1.5)
with(sig_genes, {
  plot(log2FoldChange, -log10(padj), main=paste(groups[3], "vs", groups[1]),
       xlab="log2FC", ylab="-log10(Padj)", pch=20, cex=0.5)
  text(log2FoldChange, -log10(padj), labels=rownames(r), cex=0.6, pos=4)
})
with(subset(r, padj<0.05 & abs(log2FoldChange) >= 1),
     points(log2FoldChange, -log10(padj), pch=20, col=(sign(log2FoldChange) + 3)/2, cex=1))
legend("bottomleft", title=paste("Padj<", 0.05, sep=""), legend=c("down", "up"), pch=20,col=1:2)

# Plot only significant genes
# MD PLOT
par(mar=c(4,4,2,1), cex.main=1.5)
with(sig_genes, {
  plot(log10(baseMean), log2FoldChange,
       main=paste(groups[3], "vs", groups[1]),
       xlab="log10(mean of normalized counts)", ylab="log2FoldChange",
       pch=20, col=(sign(log2FoldChange) + 3)/2, cex=1)
  text(log10(baseMean), log2FoldChange, labels=rownames(sig_genes), cex=0.6, pos=4)
})
legend("bottomleft", title=paste("Padj<", 0.05, sep=""), legend=c("down", "up"), pch=20, col=1:2)
abline(h=0)

palette(old.pal) # restore palette

# Add gene symbols to the points on the graph
# Install the packages if you do not already have them
library(ggplot2)
library(ggrepel)

plotVolcano <- function(res, title = "Volcano Plot") {
  res$group <- "NS"
  res$group[res$padj < 0.05 & res$log2FoldChange > 1] <- "Up"
  res$group[res$padj < 0.05 & res$log2FoldChange < -1] <- "Down"
  
  res$label <- ifelse(res$group != "NS", rownames(res), NA)
  
  ggplot(res, aes(x = log2FoldChange, y = -log10(padj), color = group)) +
    geom_point(alpha = 0.7, size = 2) +
    geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
    geom_vline(xintercept = c(-1, 1), linetype = "dashed") +
    scale_color_manual(values = c("Up" = "firebrick", "Down" = "dodgerblue", "NS" = "grey80")) +
    ggrepel::geom_text_repel(aes(label = label), size = 3, max.overlaps = Inf) +
    theme_minimal() +
    labs(title = title, x = "log2 Fold Change", y = "-log10 adjusted p-value", color = "Regulation")
}

# Example of function usage
plotVolcano(r, paste(groups[3], "vs", groups[1]))
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 428 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

# Genes upregulated
up_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange >= 1))

# Genes downregulated
down_genes <- rownames(subset(r, padj < 0.05 & log2FoldChange <= -1))

# All DEGs with |log2FC| > 2
de_genes <- c(up_genes, down_genes)

expr_datasa <- as.data.frame(expr_data[rownames(expr_data) %in% de_genes, ])

# Convert expression data to long format for ggplot2
expr_datas <- as.data.frame(expr_datasa)
expr_datas$Gene <- rownames(expr_datas)
expr_datas <- pivot_longer(expr_datas, cols = -Gene, names_to = "Sample", values_to = "Expression")
expr_datas <- merge(expr_datas, pheno_data, by.x = "Sample", by.y = "geo_accession")

# Check column names and a sample of the data
expresse <- r[order(r$padj)[1:429], ]
expresse <- merge(as.data.frame(expresse), expr_datasa, by = 0, sort = F)


library(dplyr)
library(tibble)
library(clusterProfiler)
library(ggplot2)


# Filter differentially expressed genes
de_genes <- subset(r, padj < 0.05 & abs(log2FoldChange) > 1)

#Create a data frame from de_genes
de_genes <- data.frame(de_genes)

# Suppose your dataframe is called df and you want to select columns ‘column1’, “column2” and ‘column3’
de_genes <- de_genes %>% select(log2FoldChange, padj)

# Assuming that rownames(df) are gene symbols
gene_symbols <- rownames(de_genes)

# Convert gene symbols to Entrez IDs
gene_entrez_ids <- bitr(gene_symbols, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
## 'select()' returned 1:1 mapping between keys and columns
# First, convert the rownames from df into a column
de_genes$Symbol <- rownames(de_genes)

# Performs the join between the original table and the converted IDs
de_genes <- merge(de_genes, gene_entrez_ids, by.x = "Symbol", by.y = "SYMBOL", all.x = TRUE)

# Ensure unique symbols
de_genes <- de_genes[!duplicated(de_genes$ENTREZID), ]

# Alternative Workflow with STRINGdb Integration in R
# Initialise the STRINGdb object for a specific organism
string_db <- STRINGdb$new(version = "12", species = 9606, score_threshold = 400, input_directory = "")

options(timeout = 600)  # increases to 10 minutes

# Gene mapping using STRINGdb
mapped_genes <- string_db$map(de_genes, "Symbol", removeUnmappedRows = TRUE)

# Recover interactions for mapped genes
interactions <- string_db$get_interactions(mapped_genes$STRING_id)

# Visualise the network using igraph or other visualisation tools
library(igraph)

11. Transcription factors between the analysis of non lesion PsA vs Control.

# Load the TxDb database
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene

# Extraction of exons by gene
exons_by_gene <- exonsBy(txdb, by = "gene")

# Obtain the coordinates of the genes (taking the first and last positions of the exons)
genes_info <- range(exons_by_gene)

# Obtain transcription information
transcripts_info <- transcripts(txdb)

# Map genes of interest to Entrez IDs (replace “de_genes” with the actual list of genes)
gene_entrez <- mapIds(org.Hs.eg.db, keys = mapped_genes$Symbol, column = "ENTREZID", keytype = "SYMBOL", multiVals = "first")
## 'select()' returned 1:1 mapping between keys and columns
# Filter the genes of interest
promoters_info <- subset(genes_info, names(genes_info) %in% gene_entrez)

# Define promoter regions (2 kb upstream of TSS)
promoters <- promoters(promoters_info, upstream = 2000, downstream = 0)

# Filter the main chromosomes
promoters <- keepStandardChromosomes(promoters, pruning.mode = "coarse")

# Remove any regions that exceed the limits of the chromosomes.
promoters <- trim(promoters)

# Convert CompressedGRangesList promoters to GRanges
promoters_gr <- unlist(promoters)

# Load the JASPAR grounds database
motifs <- getMatrixSet(JASPAR2020, opts = list(species = "Homo sapiens"))

# Create a data frame with the IDs of the reasons and their names (TFs)
motif_info <- data.frame(
  motif_id = names(motifs),
  tf_name = sapply(motifs, function(x) x@name),
  stringsAsFactors = FALSE
)

# Enriching motifs in promoting regions
motifHits <- matchMotifs(motifs, promoters_gr, genome = BSgenome.Hsapiens.UCSC.hg38)

# Count the number of occurrences of the reasons in each promoting region
motif_counts <- countOverlaps(promoters_gr, motifHits)

# Add counts to GRanges with promoting regions
promoters_gr$motif_counts <- motif_counts

# Summary of motif counts
summary(promoters_gr$motif_counts)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1       1       1       1       1       1
# Obtain the names of the motifs and TFs
motif_names <- names(motifHits)

# Extract the reason table from the RangedSummarizedExperiment object
motif_data <- assays(motifHits)[[1]] # Assuming that the table of reasons is in the first list of assays

# Obtain information about TFs
# The names of the TFs can be in the colnames or in associated metadata
tf_names <- colnames(motif_data)  

# Add the reason count if available
motif_counts <- rowSums(motif_data) # Count the occurrence of motifs (adjust as necessary)

# Check the dimensions of the motif_data object
dim(motif_data) # Number of rows and columns
## [1]   1 633
# Check the length of tf_names and motif_counts
tf_names_length <- length(tf_names)
motif_counts_length <- length(motif_counts)

# Print the lengths for diagnosis
cat("Length of tf_names:", tf_names_length, "\n")
## Length of tf_names: 633
cat("Length of motif_counts:", motif_counts_length, "\n")
## Length of motif_counts: 1
# Verify that the dimensions of motif_data correspond to the number of TFs and motifs
motif_data_dims <- dim(motif_data)
cat("Dimensions of motif_data (rows, columns):", motif_data_dims, "\n")
## Dimensions of motif_data (rows, columns): 1 633
# Check whether the number of reasons is greater or lesser
if (tf_names_length > motif_counts_length) {
  # Check additional columns
  tf_names <- tf_names[1:motif_counts_length] # Adjust to the length of motif_counts
} else {
  # Adjust motif_counts to match tf_names
  motif_counts <- motif_counts[1:tf_names_length]
}

# Now, I have created the data frame with adjusted lengths
motif_summary <- data.frame(
  tf_name = tf_names,
  motif_count = motif_counts
)

# Check the correspondence between TF names and the columns in motif_data
all(tf_names %in% colnames(motif_data)) # Should return TRUE if all TF names are present
## [1] TRUE
# Sort and identify key TFs
tf_summary <- motif_summary[order(motif_summary$motif_count, decreasing = TRUE), ]
top_tf_summary <- head(tf_summary, 130) # Adjust the number as required.

# Mapping Entrez IDs to gene symbols
gene_symbols <- mapIds(org.Hs.eg.db, keys = rownames(top_tf_summary), column = "SYMBOL", keytype = "ENTREZID", multiVals = "first")
## 'select()' returned 1:1 mapping between keys and columns
# Replace rownames with gene symbols
rownames(top_tf_summary) <- gene_symbols

top_tf_summary$motif_id <- rownames(top_tf_summary)

# Combine with the table of TF names
motif1 <- merge(top_tf_summary, motif_info, by.x = "tf_name", by.y = "motif_id")

# Reorganise columns for better viewing
motif2<- motif1[, c("motif_id", "tf_name.y", "motif_count")]
colnames(motif2) <- c("motif_id", "tf_name.y", "motif_count")

# View
print(motif2)
##   motif_id tf_name.y motif_count
## 1     SPP1     FOXF2          99